xref: /aosp_15_r20/external/clang/test/OpenMP/nvptx_teams_codegen.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // Test target codegen - host bc file has to be created first.
2*67e74705SXin Li // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
3*67e74705SXin Li // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
4*67e74705SXin Li // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
5*67e74705SXin Li // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
6*67e74705SXin Li // expected-no-diagnostics
7*67e74705SXin Li #ifndef HEADER
8*67e74705SXin Li #define HEADER
9*67e74705SXin Li 
10*67e74705SXin Li #ifdef CK1
11*67e74705SXin Li 
12*67e74705SXin Li template <typename T>
tmain(T argc)13*67e74705SXin Li int tmain(T argc) {
14*67e74705SXin Li #pragma omp target
15*67e74705SXin Li #pragma omp teams
16*67e74705SXin Li   argc = 0;
17*67e74705SXin Li   return 0;
18*67e74705SXin Li }
19*67e74705SXin Li 
20*67e74705SXin Li 
main(int argc,char ** argv)21*67e74705SXin Li int main (int argc, char **argv) {
22*67e74705SXin Li #pragma omp target
23*67e74705SXin Li #pragma omp teams
24*67e74705SXin Li   {
25*67e74705SXin Li   argc = 0;
26*67e74705SXin Li   }
27*67e74705SXin Li   return tmain(argv);
28*67e74705SXin Li }
29*67e74705SXin Li 
30*67e74705SXin Li // only nvptx side: do not outline teams region and do not call fork_teams
31*67e74705SXin Li // CK1:  define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[ARGC:%.+]])
32*67e74705SXin Li // CK1:  {{.+}} = alloca i{{[0-9]+}}*,
33*67e74705SXin Li // CK1:  {{.+}} = alloca i{{[0-9]+}}*,
34*67e74705SXin Li // CK1:  [[ARGCADDR_PTR:%.+]] = alloca i{{[0-9]+}}*,
35*67e74705SXin Li // CK1:  [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
36*67e74705SXin Li // CK1:  store {{.+}} 0, {{.+}},
37*67e74705SXin Li // CK1:  store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
38*67e74705SXin Li // CK1-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
39*67e74705SXin Li // CK1-64:  store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
40*67e74705SXin Li // CK1-32:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
41*67e74705SXin Li // CK1:  [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
42*67e74705SXin Li // CK1:  store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
43*67e74705SXin Li // CK1-NOT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
44*67e74705SXin Li // CK1:  ret void
45*67e74705SXin Li // CK1-NEXT: }
46*67e74705SXin Li 
47*67e74705SXin Li // target region in template
48*67e74705SXin Li // CK1: define {{.*}}void @{{[^,]+}}(i{{.+}}** [[ARGC:%.+]])
49*67e74705SXin Li // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
50*67e74705SXin Li // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
51*67e74705SXin Li // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
52*67e74705SXin Li // CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
53*67e74705SXin Li // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
54*67e74705SXin Li // CK1: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
55*67e74705SXin Li // CK1-NOT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
56*67e74705SXin Li // CK1:  ret void
57*67e74705SXin Li // CK1-NEXT: }
58*67e74705SXin Li 
59*67e74705SXin Li 
60*67e74705SXin Li #endif // CK1
61*67e74705SXin Li 
62*67e74705SXin Li // Test target codegen - host bc file has to be created first.
63*67e74705SXin Li // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
64*67e74705SXin Li // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
65*67e74705SXin Li // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
66*67e74705SXin Li // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
67*67e74705SXin Li // expected-no-diagnostics
68*67e74705SXin Li #ifdef CK2
69*67e74705SXin Li 
70*67e74705SXin Li template <typename T>
tmain(T argc)71*67e74705SXin Li int tmain(T argc) {
72*67e74705SXin Li   int a = 10;
73*67e74705SXin Li   int b = 5;
74*67e74705SXin Li #pragma omp target
75*67e74705SXin Li #pragma omp teams num_teams(a) thread_limit(b)
76*67e74705SXin Li   {
77*67e74705SXin Li   argc = 0;
78*67e74705SXin Li   }
79*67e74705SXin Li   return 0;
80*67e74705SXin Li }
81*67e74705SXin Li 
main(int argc,char ** argv)82*67e74705SXin Li int main (int argc, char **argv) {
83*67e74705SXin Li   int a = 20;
84*67e74705SXin Li   int b = 5;
85*67e74705SXin Li #pragma omp target
86*67e74705SXin Li #pragma omp teams num_teams(a) thread_limit(b)
87*67e74705SXin Li   {
88*67e74705SXin Li   argc = 0;
89*67e74705SXin Li   }
90*67e74705SXin Li   return tmain(argv);
91*67e74705SXin Li }
92*67e74705SXin Li 
93*67e74705SXin Li // CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[ARGC_IN:.+]])
94*67e74705SXin Li // CK2: {{.}} = alloca i{{[0-9]+}}*,
95*67e74705SXin Li // CK2: {{.}} = alloca i{{[0-9]+}}*,
96*67e74705SXin Li // CK2: [[ARGCADDR_PTR:%.+]] = alloca i{{[0-9]+}}*,
97*67e74705SXin Li // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
98*67e74705SXin Li // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
99*67e74705SXin Li // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
100*67e74705SXin Li // CK2-NOT:  {{%.+}} = call i32 @__kmpc_global_thread_num(
101*67e74705SXin Li // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
102*67e74705SXin Li // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
103*67e74705SXin Li // CK2: store i{{[0-9]+}} [[ARGC_IN]], i{{[0-9]+}}* [[ARGCADDR]],
104*67e74705SXin Li // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
105*67e74705SXin Li // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
106*67e74705SXin Li // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
107*67e74705SXin Li // CK2-64:  store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
108*67e74705SXin Li // CK2-32:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
109*67e74705SXin Li // CK2:  [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
110*67e74705SXin Li // CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
111*67e74705SXin Li // CK2-NOT:  {{.+}} = call i32 @__kmpc_push_num_teams(
112*67e74705SXin Li // CK2-NOT:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
113*67e74705SXin Li // CK2: ret
114*67e74705SXin Li 
115*67e74705SXin Li // CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[BP:%.+]], i{{[0-9]+}}** [[ARGC:%.+]])
116*67e74705SXin Li // CK2: [[ARGCADDR_PTR:%.+]] = alloca i{{[0-9]+}}***,
117*67e74705SXin Li // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
118*67e74705SXin Li // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
119*67e74705SXin Li // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**,
120*67e74705SXin Li // CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num(
121*67e74705SXin Li // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
122*67e74705SXin Li // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
123*67e74705SXin Li // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
124*67e74705SXin Li // CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],
125*67e74705SXin Li // CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}***, i{{[0-9]+}}**** [[ARGCADDR_PTR]],
126*67e74705SXin Li // CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
127*67e74705SXin Li // CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
128*67e74705SXin Li // CK2-NOT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
129*67e74705SXin Li // CK2:  ret void
130*67e74705SXin Li 
131*67e74705SXin Li #endif // CK2
132*67e74705SXin Li #endif
133