1*67e74705SXin Li // Test target codegen - host bc file has to be created first.
2*67e74705SXin Li // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3*67e74705SXin Li // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
4*67e74705SXin Li // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5*67e74705SXin Li // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6*67e74705SXin Li // expected-no-diagnostics
7*67e74705SXin Li #ifndef HEADER
8*67e74705SXin Li #define HEADER
9*67e74705SXin Li
10*67e74705SXin Li // CHECK-DAG: [[OMP_NT:@.+]] = common addrspace(3) global i32 0
11*67e74705SXin Li // CHECK-DAG: [[OMP_WID:@.+]] = common addrspace(3) global i64 0
12*67e74705SXin Li
13*67e74705SXin Li template<typename tx, typename ty>
14*67e74705SXin Li struct TT{
15*67e74705SXin Li tx X;
16*67e74705SXin Li ty Y;
17*67e74705SXin Li };
18*67e74705SXin Li
foo(int n)19*67e74705SXin Li int foo(int n) {
20*67e74705SXin Li int a = 0;
21*67e74705SXin Li short aa = 0;
22*67e74705SXin Li float b[10];
23*67e74705SXin Li float bn[n];
24*67e74705SXin Li double c[5][10];
25*67e74705SXin Li double cn[5][n];
26*67e74705SXin Li TT<long long, char> d;
27*67e74705SXin Li
28*67e74705SXin Li // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l86}}_worker()
29*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
30*67e74705SXin Li //
31*67e74705SXin Li // CHECK: [[AWAIT_WORK]]
32*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
33*67e74705SXin Li // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
34*67e74705SXin Li // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
35*67e74705SXin Li // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
36*67e74705SXin Li //
37*67e74705SXin Li // CHECK: [[SEL_WORKERS]]
38*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
39*67e74705SXin Li // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
40*67e74705SXin Li // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
41*67e74705SXin Li // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
42*67e74705SXin Li //
43*67e74705SXin Li // CHECK: [[EXEC_PARALLEL]]
44*67e74705SXin Li // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
45*67e74705SXin Li //
46*67e74705SXin Li // CHECK: [[TERM_PARALLEL]]
47*67e74705SXin Li // CHECK: br label {{%?}}[[BAR_PARALLEL]]
48*67e74705SXin Li //
49*67e74705SXin Li // CHECK: [[BAR_PARALLEL]]
50*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
51*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK]]
52*67e74705SXin Li //
53*67e74705SXin Li // CHECK: [[EXIT]]
54*67e74705SXin Li // CHECK: ret void
55*67e74705SXin Li
56*67e74705SXin Li // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l86]]()
57*67e74705SXin Li // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
58*67e74705SXin Li // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
59*67e74705SXin Li // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
60*67e74705SXin Li // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
61*67e74705SXin Li // CHECK: [[MID:%.+]] = and i32 [[B]],
62*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
63*67e74705SXin Li // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
64*67e74705SXin Li // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
65*67e74705SXin Li //
66*67e74705SXin Li // CHECK: [[CHECK_WORKER]]
67*67e74705SXin Li // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
68*67e74705SXin Li // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
69*67e74705SXin Li //
70*67e74705SXin Li // CHECK: [[WORKER]]
71*67e74705SXin Li // CHECK: call void [[T1]]_worker()
72*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
73*67e74705SXin Li //
74*67e74705SXin Li // CHECK: [[MASTER]]
75*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
76*67e74705SXin Li // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
77*67e74705SXin Li // CHECK: br label {{%?}}[[TERM:.+]]
78*67e74705SXin Li //
79*67e74705SXin Li // CHECK: [[TERM]]
80*67e74705SXin Li // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
81*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
82*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
83*67e74705SXin Li //
84*67e74705SXin Li // CHECK: [[EXIT]]
85*67e74705SXin Li // CHECK: ret void
86*67e74705SXin Li #pragma omp target
87*67e74705SXin Li {
88*67e74705SXin Li }
89*67e74705SXin Li
90*67e74705SXin Li // CHECK-NOT: define {{.*}}void [[T2:@__omp_offloading_.+foo.+]]_worker()
91*67e74705SXin Li #pragma omp target if(0)
92*67e74705SXin Li {
93*67e74705SXin Li }
94*67e74705SXin Li
95*67e74705SXin Li // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l157}}_worker()
96*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
97*67e74705SXin Li //
98*67e74705SXin Li // CHECK: [[AWAIT_WORK]]
99*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
100*67e74705SXin Li // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
101*67e74705SXin Li // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
102*67e74705SXin Li // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
103*67e74705SXin Li //
104*67e74705SXin Li // CHECK: [[SEL_WORKERS]]
105*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
106*67e74705SXin Li // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
107*67e74705SXin Li // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
108*67e74705SXin Li // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
109*67e74705SXin Li //
110*67e74705SXin Li // CHECK: [[EXEC_PARALLEL]]
111*67e74705SXin Li // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
112*67e74705SXin Li //
113*67e74705SXin Li // CHECK: [[TERM_PARALLEL]]
114*67e74705SXin Li // CHECK: br label {{%?}}[[BAR_PARALLEL]]
115*67e74705SXin Li //
116*67e74705SXin Li // CHECK: [[BAR_PARALLEL]]
117*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
118*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK]]
119*67e74705SXin Li //
120*67e74705SXin Li // CHECK: [[EXIT]]
121*67e74705SXin Li // CHECK: ret void
122*67e74705SXin Li
123*67e74705SXin Li // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l157]](i[[SZ:32|64]] [[ARG1:%.+]])
124*67e74705SXin Li // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]],
125*67e74705SXin Li // CHECK: store i[[SZ]] [[ARG1]], i[[SZ]]* [[AA_ADDR]],
126*67e74705SXin Li // CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
127*67e74705SXin Li // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
128*67e74705SXin Li // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
129*67e74705SXin Li // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
130*67e74705SXin Li // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
131*67e74705SXin Li // CHECK: [[MID:%.+]] = and i32 [[B]],
132*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
133*67e74705SXin Li // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
134*67e74705SXin Li // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
135*67e74705SXin Li //
136*67e74705SXin Li // CHECK: [[CHECK_WORKER]]
137*67e74705SXin Li // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
138*67e74705SXin Li // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
139*67e74705SXin Li //
140*67e74705SXin Li // CHECK: [[WORKER]]
141*67e74705SXin Li // CHECK: call void [[T3]]_worker()
142*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
143*67e74705SXin Li //
144*67e74705SXin Li // CHECK: [[MASTER]]
145*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
146*67e74705SXin Li // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
147*67e74705SXin Li // CHECK: load i16, i16* [[AA_CADDR]],
148*67e74705SXin Li // CHECK: br label {{%?}}[[TERM:.+]]
149*67e74705SXin Li //
150*67e74705SXin Li // CHECK: [[TERM]]
151*67e74705SXin Li // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
152*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
153*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
154*67e74705SXin Li //
155*67e74705SXin Li // CHECK: [[EXIT]]
156*67e74705SXin Li // CHECK: ret void
157*67e74705SXin Li #pragma omp target if(1)
158*67e74705SXin Li {
159*67e74705SXin Li aa += 1;
160*67e74705SXin Li }
161*67e74705SXin Li
162*67e74705SXin Li // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l260}}_worker()
163*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
164*67e74705SXin Li //
165*67e74705SXin Li // CHECK: [[AWAIT_WORK]]
166*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
167*67e74705SXin Li // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
168*67e74705SXin Li // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
169*67e74705SXin Li // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
170*67e74705SXin Li //
171*67e74705SXin Li // CHECK: [[SEL_WORKERS]]
172*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
173*67e74705SXin Li // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
174*67e74705SXin Li // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
175*67e74705SXin Li // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
176*67e74705SXin Li //
177*67e74705SXin Li // CHECK: [[EXEC_PARALLEL]]
178*67e74705SXin Li // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
179*67e74705SXin Li //
180*67e74705SXin Li // CHECK: [[TERM_PARALLEL]]
181*67e74705SXin Li // CHECK: br label {{%?}}[[BAR_PARALLEL]]
182*67e74705SXin Li //
183*67e74705SXin Li // CHECK: [[BAR_PARALLEL]]
184*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
185*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK]]
186*67e74705SXin Li //
187*67e74705SXin Li // CHECK: [[EXIT]]
188*67e74705SXin Li // CHECK: ret void
189*67e74705SXin Li
190*67e74705SXin Li // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+foo.+l260]](i[[SZ]]
191*67e74705SXin Li // Create local storage for each capture.
192*67e74705SXin Li // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
193*67e74705SXin Li // CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]*
194*67e74705SXin Li // CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
195*67e74705SXin Li // CHECK: [[LOCAL_BN:%.+]] = alloca float*
196*67e74705SXin Li // CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
197*67e74705SXin Li // CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
198*67e74705SXin Li // CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
199*67e74705SXin Li // CHECK: [[LOCAL_CN:%.+]] = alloca double*
200*67e74705SXin Li // CHECK: [[LOCAL_D:%.+]] = alloca [[TT:%.+]]*
201*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
202*67e74705SXin Li // CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
203*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
204*67e74705SXin Li // CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
205*67e74705SXin Li // CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
206*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
207*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
208*67e74705SXin Li // CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
209*67e74705SXin Li // CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
210*67e74705SXin Li //
211*67e74705SXin Li // CHECK-64-DAG: [[REF_A:%.+]] = bitcast i64* [[LOCAL_A]] to i32*
212*67e74705SXin Li // CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
213*67e74705SXin Li // CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
214*67e74705SXin Li // CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
215*67e74705SXin Li // CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
216*67e74705SXin Li // CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
217*67e74705SXin Li // CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
218*67e74705SXin Li // CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
219*67e74705SXin Li // CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
220*67e74705SXin Li //
221*67e74705SXin Li // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
222*67e74705SXin Li // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
223*67e74705SXin Li // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
224*67e74705SXin Li // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
225*67e74705SXin Li // CHECK: [[MID:%.+]] = and i32 [[B]],
226*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
227*67e74705SXin Li // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
228*67e74705SXin Li // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
229*67e74705SXin Li //
230*67e74705SXin Li // CHECK: [[CHECK_WORKER]]
231*67e74705SXin Li // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
232*67e74705SXin Li // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
233*67e74705SXin Li //
234*67e74705SXin Li // CHECK: [[WORKER]]
235*67e74705SXin Li // CHECK: call void [[T4]]_worker()
236*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
237*67e74705SXin Li //
238*67e74705SXin Li // CHECK: [[MASTER]]
239*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
240*67e74705SXin Li // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
241*67e74705SXin Li //
242*67e74705SXin Li // Use captures.
243*67e74705SXin Li // CHECK-64-DAG: load i32, i32* [[REF_A]]
244*67e74705SXin Li // CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
245*67e74705SXin Li // CHECK-DAG: getelementptr inbounds [10 x float], [10 x float]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
246*67e74705SXin Li // CHECK-DAG: getelementptr inbounds float, float* [[REF_BN]], i[[SZ]] 3
247*67e74705SXin Li // CHECK-DAG: getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] 0, i[[SZ]] 1
248*67e74705SXin Li // CHECK-DAG: getelementptr inbounds double, double* [[REF_CN]], i[[SZ]] %{{.+}}
249*67e74705SXin Li // CHECK-DAG: getelementptr inbounds [[TT]], [[TT]]* [[REF_D]], i32 0, i32 0
250*67e74705SXin Li //
251*67e74705SXin Li // CHECK: br label {{%?}}[[TERM:.+]]
252*67e74705SXin Li //
253*67e74705SXin Li // CHECK: [[TERM]]
254*67e74705SXin Li // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
255*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
256*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
257*67e74705SXin Li //
258*67e74705SXin Li // CHECK: [[EXIT]]
259*67e74705SXin Li // CHECK: ret void
260*67e74705SXin Li #pragma omp target if(n>20)
261*67e74705SXin Li {
262*67e74705SXin Li a += 1;
263*67e74705SXin Li b[2] += 1.0;
264*67e74705SXin Li bn[3] += 1.0;
265*67e74705SXin Li c[1][2] += 1.0;
266*67e74705SXin Li cn[1][3] += 1.0;
267*67e74705SXin Li d.X += 1;
268*67e74705SXin Li d.Y += 1;
269*67e74705SXin Li }
270*67e74705SXin Li
271*67e74705SXin Li return a;
272*67e74705SXin Li }
273*67e74705SXin Li
274*67e74705SXin Li template<typename tx>
ftemplate(int n)275*67e74705SXin Li tx ftemplate(int n) {
276*67e74705SXin Li tx a = 0;
277*67e74705SXin Li short aa = 0;
278*67e74705SXin Li tx b[10];
279*67e74705SXin Li
280*67e74705SXin Li #pragma omp target if(n>40)
281*67e74705SXin Li {
282*67e74705SXin Li a += 1;
283*67e74705SXin Li aa += 1;
284*67e74705SXin Li b[2] += 1;
285*67e74705SXin Li }
286*67e74705SXin Li
287*67e74705SXin Li return a;
288*67e74705SXin Li }
289*67e74705SXin Li
290*67e74705SXin Li static
fstatic(int n)291*67e74705SXin Li int fstatic(int n) {
292*67e74705SXin Li int a = 0;
293*67e74705SXin Li short aa = 0;
294*67e74705SXin Li char aaa = 0;
295*67e74705SXin Li int b[10];
296*67e74705SXin Li
297*67e74705SXin Li #pragma omp target if(n>50)
298*67e74705SXin Li {
299*67e74705SXin Li a += 1;
300*67e74705SXin Li aa += 1;
301*67e74705SXin Li aaa += 1;
302*67e74705SXin Li b[2] += 1;
303*67e74705SXin Li }
304*67e74705SXin Li
305*67e74705SXin Li return a;
306*67e74705SXin Li }
307*67e74705SXin Li
308*67e74705SXin Li struct S1 {
309*67e74705SXin Li double a;
310*67e74705SXin Li
r1S1311*67e74705SXin Li int r1(int n){
312*67e74705SXin Li int b = n+1;
313*67e74705SXin Li short int c[2][n];
314*67e74705SXin Li
315*67e74705SXin Li #pragma omp target if(n>60)
316*67e74705SXin Li {
317*67e74705SXin Li this->a = (double)b + 1.5;
318*67e74705SXin Li c[1][1] = ++a;
319*67e74705SXin Li }
320*67e74705SXin Li
321*67e74705SXin Li return c[1][1] + (int)b;
322*67e74705SXin Li }
323*67e74705SXin Li };
324*67e74705SXin Li
bar(int n)325*67e74705SXin Li int bar(int n){
326*67e74705SXin Li int a = 0;
327*67e74705SXin Li
328*67e74705SXin Li a += foo(n);
329*67e74705SXin Li
330*67e74705SXin Li S1 S;
331*67e74705SXin Li a += S.r1(n);
332*67e74705SXin Li
333*67e74705SXin Li a += fstatic(n);
334*67e74705SXin Li
335*67e74705SXin Li a += ftemplate<int>(n);
336*67e74705SXin Li
337*67e74705SXin Li return a;
338*67e74705SXin Li }
339*67e74705SXin Li
340*67e74705SXin Li // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+l297}}_worker()
341*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
342*67e74705SXin Li //
343*67e74705SXin Li // CHECK: [[AWAIT_WORK]]
344*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
345*67e74705SXin Li // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
346*67e74705SXin Li // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
347*67e74705SXin Li // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
348*67e74705SXin Li //
349*67e74705SXin Li // CHECK: [[SEL_WORKERS]]
350*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
351*67e74705SXin Li // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
352*67e74705SXin Li // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
353*67e74705SXin Li // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
354*67e74705SXin Li //
355*67e74705SXin Li // CHECK: [[EXEC_PARALLEL]]
356*67e74705SXin Li // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
357*67e74705SXin Li //
358*67e74705SXin Li // CHECK: [[TERM_PARALLEL]]
359*67e74705SXin Li // CHECK: br label {{%?}}[[BAR_PARALLEL]]
360*67e74705SXin Li //
361*67e74705SXin Li // CHECK: [[BAR_PARALLEL]]
362*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
363*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK]]
364*67e74705SXin Li //
365*67e74705SXin Li // CHECK: [[EXIT]]
366*67e74705SXin Li // CHECK: ret void
367*67e74705SXin Li
368*67e74705SXin Li // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+static.+l297]](i[[SZ]]
369*67e74705SXin Li // Create local storage for each capture.
370*67e74705SXin Li // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
371*67e74705SXin Li // CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
372*67e74705SXin Li // CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
373*67e74705SXin Li // CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]*
374*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
375*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
376*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
377*67e74705SXin Li // CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
378*67e74705SXin Li // Store captures in the context.
379*67e74705SXin Li // CHECK-64-DAG: [[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
380*67e74705SXin Li // CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
381*67e74705SXin Li // CHECK-DAG: [[REF_AAA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
382*67e74705SXin Li // CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
383*67e74705SXin Li //
384*67e74705SXin Li // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
385*67e74705SXin Li // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
386*67e74705SXin Li // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
387*67e74705SXin Li // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
388*67e74705SXin Li // CHECK: [[MID:%.+]] = and i32 [[B]],
389*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
390*67e74705SXin Li // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
391*67e74705SXin Li // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
392*67e74705SXin Li //
393*67e74705SXin Li // CHECK: [[CHECK_WORKER]]
394*67e74705SXin Li // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
395*67e74705SXin Li // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
396*67e74705SXin Li //
397*67e74705SXin Li // CHECK: [[WORKER]]
398*67e74705SXin Li // CHECK: call void [[T5]]_worker()
399*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
400*67e74705SXin Li //
401*67e74705SXin Li // CHECK: [[MASTER]]
402*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
403*67e74705SXin Li // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
404*67e74705SXin Li //
405*67e74705SXin Li // CHECK-64-DAG: load i32, i32* [[REF_A]]
406*67e74705SXin Li // CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
407*67e74705SXin Li // CHECK-DAG: load i16, i16* [[REF_AA]]
408*67e74705SXin Li // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
409*67e74705SXin Li //
410*67e74705SXin Li // CHECK: br label {{%?}}[[TERM:.+]]
411*67e74705SXin Li //
412*67e74705SXin Li // CHECK: [[TERM]]
413*67e74705SXin Li // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
414*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
415*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
416*67e74705SXin Li //
417*67e74705SXin Li // CHECK: [[EXIT]]
418*67e74705SXin Li // CHECK: ret void
419*67e74705SXin Li
420*67e74705SXin Li
421*67e74705SXin Li
422*67e74705SXin Li // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l315}}_worker()
423*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
424*67e74705SXin Li //
425*67e74705SXin Li // CHECK: [[AWAIT_WORK]]
426*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
427*67e74705SXin Li // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
428*67e74705SXin Li // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
429*67e74705SXin Li // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
430*67e74705SXin Li //
431*67e74705SXin Li // CHECK: [[SEL_WORKERS]]
432*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
433*67e74705SXin Li // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
434*67e74705SXin Li // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
435*67e74705SXin Li // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
436*67e74705SXin Li //
437*67e74705SXin Li // CHECK: [[EXEC_PARALLEL]]
438*67e74705SXin Li // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
439*67e74705SXin Li //
440*67e74705SXin Li // CHECK: [[TERM_PARALLEL]]
441*67e74705SXin Li // CHECK: br label {{%?}}[[BAR_PARALLEL]]
442*67e74705SXin Li //
443*67e74705SXin Li // CHECK: [[BAR_PARALLEL]]
444*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
445*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK]]
446*67e74705SXin Li //
447*67e74705SXin Li // CHECK: [[EXIT]]
448*67e74705SXin Li // CHECK: ret void
449*67e74705SXin Li
450*67e74705SXin Li // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+S1.+l315]](
451*67e74705SXin Li // Create local storage for each capture.
452*67e74705SXin Li // CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1:%struct.*]]*
453*67e74705SXin Li // CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]]
454*67e74705SXin Li // CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
455*67e74705SXin Li // CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
456*67e74705SXin Li // CHECK: [[LOCAL_C:%.+]] = alloca i16*
457*67e74705SXin Li // CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
458*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
459*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
460*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
461*67e74705SXin Li // CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
462*67e74705SXin Li // Store captures in the context.
463*67e74705SXin Li // CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
464*67e74705SXin Li // CHECK-64-DAG:[[REF_B:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
465*67e74705SXin Li // CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
466*67e74705SXin Li // CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
467*67e74705SXin Li // CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
468*67e74705SXin Li // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
469*67e74705SXin Li // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
470*67e74705SXin Li // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
471*67e74705SXin Li // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
472*67e74705SXin Li // CHECK: [[MID:%.+]] = and i32 [[B]],
473*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
474*67e74705SXin Li // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
475*67e74705SXin Li // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
476*67e74705SXin Li //
477*67e74705SXin Li // CHECK: [[CHECK_WORKER]]
478*67e74705SXin Li // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
479*67e74705SXin Li // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
480*67e74705SXin Li //
481*67e74705SXin Li // CHECK: [[WORKER]]
482*67e74705SXin Li // CHECK: call void [[T6]]_worker()
483*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
484*67e74705SXin Li //
485*67e74705SXin Li // CHECK: [[MASTER]]
486*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
487*67e74705SXin Li // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
488*67e74705SXin Li // Use captures.
489*67e74705SXin Li // CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0
490*67e74705SXin Li // CHECK-64-DAG:load i32, i32* [[REF_B]]
491*67e74705SXin Li // CHECK-32-DAG:load i32, i32* [[LOCAL_B]]
492*67e74705SXin Li // CHECK-DAG: getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}}
493*67e74705SXin Li // CHECK: br label {{%?}}[[TERM:.+]]
494*67e74705SXin Li //
495*67e74705SXin Li // CHECK: [[TERM]]
496*67e74705SXin Li // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
497*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
498*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
499*67e74705SXin Li //
500*67e74705SXin Li // CHECK: [[EXIT]]
501*67e74705SXin Li // CHECK: ret void
502*67e74705SXin Li
503*67e74705SXin Li
504*67e74705SXin Li
505*67e74705SXin Li // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l280}}_worker()
506*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
507*67e74705SXin Li //
508*67e74705SXin Li // CHECK: [[AWAIT_WORK]]
509*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
510*67e74705SXin Li // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
511*67e74705SXin Li // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
512*67e74705SXin Li // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
513*67e74705SXin Li //
514*67e74705SXin Li // CHECK: [[SEL_WORKERS]]
515*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
516*67e74705SXin Li // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
517*67e74705SXin Li // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
518*67e74705SXin Li // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
519*67e74705SXin Li //
520*67e74705SXin Li // CHECK: [[EXEC_PARALLEL]]
521*67e74705SXin Li // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]]
522*67e74705SXin Li //
523*67e74705SXin Li // CHECK: [[TERM_PARALLEL]]
524*67e74705SXin Li // CHECK: br label {{%?}}[[BAR_PARALLEL]]
525*67e74705SXin Li //
526*67e74705SXin Li // CHECK: [[BAR_PARALLEL]]
527*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
528*67e74705SXin Li // CHECK: br label {{%?}}[[AWAIT_WORK]]
529*67e74705SXin Li //
530*67e74705SXin Li // CHECK: [[EXIT]]
531*67e74705SXin Li // CHECK: ret void
532*67e74705SXin Li
533*67e74705SXin Li // CHECK: define {{.*}}void [[T7:@__omp_offloading_.+template.+l280]](i[[SZ]]
534*67e74705SXin Li // Create local storage for each capture.
535*67e74705SXin Li // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
536*67e74705SXin Li // CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
537*67e74705SXin Li // CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]*
538*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
539*67e74705SXin Li // CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
540*67e74705SXin Li // CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
541*67e74705SXin Li // Store captures in the context.
542*67e74705SXin Li // CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
543*67e74705SXin Li // CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
544*67e74705SXin Li // CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
545*67e74705SXin Li //
546*67e74705SXin Li // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
547*67e74705SXin Li // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
548*67e74705SXin Li // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
549*67e74705SXin Li // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
550*67e74705SXin Li // CHECK: [[MID:%.+]] = and i32 [[B]],
551*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
552*67e74705SXin Li // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
553*67e74705SXin Li // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
554*67e74705SXin Li //
555*67e74705SXin Li // CHECK: [[CHECK_WORKER]]
556*67e74705SXin Li // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
557*67e74705SXin Li // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
558*67e74705SXin Li //
559*67e74705SXin Li // CHECK: [[WORKER]]
560*67e74705SXin Li // CHECK: call void [[T7]]_worker()
561*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
562*67e74705SXin Li //
563*67e74705SXin Li // CHECK: [[MASTER]]
564*67e74705SXin Li // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
565*67e74705SXin Li // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
566*67e74705SXin Li //
567*67e74705SXin Li // CHECK-64-DAG: load i32, i32* [[REF_A]]
568*67e74705SXin Li // CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
569*67e74705SXin Li // CHECK-DAG: load i16, i16* [[REF_AA]]
570*67e74705SXin Li // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
571*67e74705SXin Li //
572*67e74705SXin Li // CHECK: br label {{%?}}[[TERM:.+]]
573*67e74705SXin Li //
574*67e74705SXin Li // CHECK: [[TERM]]
575*67e74705SXin Li // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
576*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
577*67e74705SXin Li // CHECK: br label {{%?}}[[EXIT]]
578*67e74705SXin Li //
579*67e74705SXin Li // CHECK: [[EXIT]]
580*67e74705SXin Li // CHECK: ret void
581*67e74705SXin Li #endif
582