1*67e74705SXin Li // RUN: echo "GPU binary would be here" > %t
2*67e74705SXin Li // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck %s
3*67e74705SXin Li // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
4*67e74705SXin Li // RUN: | FileCheck %s -check-prefix=NOGLOBALS
5*67e74705SXin Li // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
6*67e74705SXin Li
7*67e74705SXin Li #include "Inputs/cuda.h"
8*67e74705SXin Li
9*67e74705SXin Li #ifndef NOGLOBALS
10*67e74705SXin Li // CHECK-DAG: @device_var = internal global i32
11*67e74705SXin Li __device__ int device_var;
12*67e74705SXin Li
13*67e74705SXin Li // CHECK-DAG: @constant_var = internal global i32
14*67e74705SXin Li __constant__ int constant_var;
15*67e74705SXin Li
16*67e74705SXin Li // CHECK-DAG: @shared_var = internal global i32
17*67e74705SXin Li __shared__ int shared_var;
18*67e74705SXin Li
19*67e74705SXin Li // Make sure host globals don't get internalized...
20*67e74705SXin Li // CHECK-DAG: @host_var = global i32
21*67e74705SXin Li int host_var;
22*67e74705SXin Li // ... and that extern vars remain external.
23*67e74705SXin Li // CHECK-DAG: @ext_host_var = external global i32
24*67e74705SXin Li extern int ext_host_var;
25*67e74705SXin Li
26*67e74705SXin Li // Shadows for external device-side variables are *definitions* of
27*67e74705SXin Li // those variables.
28*67e74705SXin Li // CHECK-DAG: @ext_device_var = internal global i32
29*67e74705SXin Li extern __device__ int ext_device_var;
30*67e74705SXin Li // CHECK-DAG: @ext_device_var = internal global i32
31*67e74705SXin Li extern __constant__ int ext_constant_var;
32*67e74705SXin Li
use_pointers()33*67e74705SXin Li void use_pointers() {
34*67e74705SXin Li int *p;
35*67e74705SXin Li p = &device_var;
36*67e74705SXin Li p = &constant_var;
37*67e74705SXin Li p = &shared_var;
38*67e74705SXin Li p = &host_var;
39*67e74705SXin Li p = &ext_device_var;
40*67e74705SXin Li p = &ext_constant_var;
41*67e74705SXin Li p = &ext_host_var;
42*67e74705SXin Li }
43*67e74705SXin Li
44*67e74705SXin Li // Make sure that all parts of GPU code init/cleanup are there:
45*67e74705SXin Li // * constant unnamed string with the kernel name
46*67e74705SXin Li // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
47*67e74705SXin Li // * constant unnamed string with GPU binary
48*67e74705SXin Li // CHECK: private unnamed_addr constant{{.*}}\00"
49*67e74705SXin Li // * constant struct that wraps GPU binary
50*67e74705SXin Li // CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* }
51*67e74705SXin Li // CHECK: { i32 1180844977, i32 1, {{.*}}, i8* null }
52*67e74705SXin Li // * variable to save GPU binary handle after initialization
53*67e74705SXin Li // CHECK: @__cuda_gpubin_handle = internal global i8** null
54*67e74705SXin Li // * Make sure our constructor/destructor was added to global ctor/dtor list.
55*67e74705SXin Li // CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
56*67e74705SXin Li // CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor
57*67e74705SXin Li
58*67e74705SXin Li // Test that we build the correct number of calls to cudaSetupArgument followed
59*67e74705SXin Li // by a call to cudaLaunch.
60*67e74705SXin Li
61*67e74705SXin Li // CHECK: define{{.*}}kernelfunc
62*67e74705SXin Li // CHECK: call{{.*}}cudaSetupArgument
63*67e74705SXin Li // CHECK: call{{.*}}cudaSetupArgument
64*67e74705SXin Li // CHECK: call{{.*}}cudaSetupArgument
65*67e74705SXin Li // CHECK: call{{.*}}cudaLaunch
kernelfunc(int i,int j,int k)66*67e74705SXin Li __global__ void kernelfunc(int i, int j, int k) {}
67*67e74705SXin Li
68*67e74705SXin Li // Test that we've built correct kernel launch sequence.
69*67e74705SXin Li // CHECK: define{{.*}}hostfunc
70*67e74705SXin Li // CHECK: call{{.*}}cudaConfigureCall
71*67e74705SXin Li // CHECK: call{{.*}}kernelfunc
hostfunc(void)72*67e74705SXin Li void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
73*67e74705SXin Li #endif
74*67e74705SXin Li
75*67e74705SXin Li // Test that we've built a function to register kernels and global vars.
76*67e74705SXin Li // CHECK: define internal void @__cuda_register_globals
77*67e74705SXin Li // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
78*67e74705SXin Li // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
79*67e74705SXin Li // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
80*67e74705SXin Li // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
81*67e74705SXin Li // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
82*67e74705SXin Li // CHECK: ret void
83*67e74705SXin Li
84*67e74705SXin Li // Test that we've built contructor..
85*67e74705SXin Li // CHECK: define internal void @__cuda_module_ctor
86*67e74705SXin Li // .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper)
87*67e74705SXin Li // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
88*67e74705SXin Li // .. stores return value in __cuda_gpubin_handle
89*67e74705SXin Li // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
90*67e74705SXin Li // .. and then calls __cuda_register_globals
91*67e74705SXin Li // CHECK-NEXT: call void @__cuda_register_globals
92*67e74705SXin Li
93*67e74705SXin Li // Test that we've created destructor.
94*67e74705SXin Li // CHECK: define internal void @__cuda_module_dtor
95*67e74705SXin Li // CHECK: load{{.*}}__cuda_gpubin_handle
96*67e74705SXin Li // CHECK-NEXT: call void @__cudaUnregisterFatBinary
97*67e74705SXin Li
98*67e74705SXin Li // There should be no __cuda_register_globals if we have no
99*67e74705SXin Li // device-side globals, but we still need to register GPU binary.
100*67e74705SXin Li // Skip GPU binary string first.
101*67e74705SXin Li // NOGLOBALS: @0 = private unnamed_addr constant{{.*}}
102*67e74705SXin Li // NOGLOBALS-NOT: define internal void @__cuda_register_globals
103*67e74705SXin Li // NOGLOBALS: define internal void @__cuda_module_ctor
104*67e74705SXin Li // NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
105*67e74705SXin Li // NOGLOBALS-NOT: call void @__cuda_register_globals
106*67e74705SXin Li // NOGLOBALS: define internal void @__cuda_module_dtor
107*67e74705SXin Li // NOGLOBALS: call void @__cudaUnregisterFatBinary
108*67e74705SXin Li
109*67e74705SXin Li // There should be no constructors/destructors if we have no GPU binary.
110*67e74705SXin Li // NOGPUBIN-NOT: define internal void @__cuda_register_globals
111*67e74705SXin Li // NOGPUBIN-NOT: define internal void @__cuda_module_ctor
112*67e74705SXin Li // NOGPUBIN-NOT: define internal void @__cuda_module_dtor
113