xref: /aosp_15_r20/external/llvm/test/CodeGen/NVPTX/access-non-generic.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix PTX
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s  -march=nvptx64 -mcpu=sm_20 -nvptx-use-infer-addrspace | FileCheck %s --check-prefix PTX
4*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -S -nvptx-favor-non-generic -dce | FileCheck %s --check-prefix IR
5*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -S -nvptx-infer-addrspace | FileCheck %s --check-prefix IR --check-prefix IR-WITH-LOOP
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Worker@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
8*9880d681SAndroid Build Coastguard Worker@scalar = internal addrspace(3) global float 0.000000e+00, align 4
9*9880d681SAndroid Build Coastguard Worker@generic_scalar = internal global float 0.000000e+00, align 4
10*9880d681SAndroid Build Coastguard Worker
11*9880d681SAndroid Build Coastguard Workerdefine float @ld_from_shared() {
12*9880d681SAndroid Build Coastguard Worker  %1 = addrspacecast float* @generic_scalar to float addrspace(3)*
13*9880d681SAndroid Build Coastguard Worker  %2 = load float, float addrspace(3)* %1
14*9880d681SAndroid Build Coastguard Worker  ret float %2
15*9880d681SAndroid Build Coastguard Worker}
16*9880d681SAndroid Build Coastguard Worker
17*9880d681SAndroid Build Coastguard Worker; Verifies nvptx-favor-non-generic correctly optimizes generic address space
18*9880d681SAndroid Build Coastguard Worker; usage to non-generic address space usage for the patterns we claim to handle:
19*9880d681SAndroid Build Coastguard Worker; 1. load cast
20*9880d681SAndroid Build Coastguard Worker; 2. store cast
21*9880d681SAndroid Build Coastguard Worker; 3. load gep cast
22*9880d681SAndroid Build Coastguard Worker; 4. store gep cast
23*9880d681SAndroid Build Coastguard Worker; gep and cast can be an instruction or a constant expression. This function
24*9880d681SAndroid Build Coastguard Worker; tries all possible combinations.
25*9880d681SAndroid Build Coastguard Workerdefine void @ld_st_shared_f32(i32 %i, float %v) {
26*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @ld_st_shared_f32
27*9880d681SAndroid Build Coastguard Worker; IR-NOT: addrspacecast
28*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld_st_shared_f32(
29*9880d681SAndroid Build Coastguard Worker  ; load cast
30*9880d681SAndroid Build Coastguard Worker  %1 = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
31*9880d681SAndroid Build Coastguard Worker  call void @use(float %1)
32*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
33*9880d681SAndroid Build Coastguard Worker  ; store cast
34*9880d681SAndroid Build Coastguard Worker  store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
35*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [scalar], %f{{[0-9]+}};
36*9880d681SAndroid Build Coastguard Worker  ; use syncthreads to disable optimizations across components
37*9880d681SAndroid Build Coastguard Worker  call void @llvm.nvvm.barrier0()
38*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0;
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Worker  ; cast; load
41*9880d681SAndroid Build Coastguard Worker  %2 = addrspacecast float addrspace(3)* @scalar to float*
42*9880d681SAndroid Build Coastguard Worker  %3 = load float, float* %2, align 4
43*9880d681SAndroid Build Coastguard Worker  call void @use(float %3)
44*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
45*9880d681SAndroid Build Coastguard Worker  ; cast; store
46*9880d681SAndroid Build Coastguard Worker  store float %v, float* %2, align 4
47*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [scalar], %f{{[0-9]+}};
48*9880d681SAndroid Build Coastguard Worker  call void @llvm.nvvm.barrier0()
49*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0;
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Worker  ; load gep cast
52*9880d681SAndroid Build Coastguard Worker  %4 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
53*9880d681SAndroid Build Coastguard Worker  call void @use(float %4)
54*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
55*9880d681SAndroid Build Coastguard Worker  ; store gep cast
56*9880d681SAndroid Build Coastguard Worker  store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
57*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
58*9880d681SAndroid Build Coastguard Worker  call void @llvm.nvvm.barrier0()
59*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0;
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Worker  ; gep cast; load
62*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
63*9880d681SAndroid Build Coastguard Worker  %6 = load float, float* %5, align 4
64*9880d681SAndroid Build Coastguard Worker  call void @use(float %6)
65*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
66*9880d681SAndroid Build Coastguard Worker  ; gep cast; store
67*9880d681SAndroid Build Coastguard Worker  store float %v, float* %5, align 4
68*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
69*9880d681SAndroid Build Coastguard Worker  call void @llvm.nvvm.barrier0()
70*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0;
71*9880d681SAndroid Build Coastguard Worker
72*9880d681SAndroid Build Coastguard Worker  ; cast; gep; load
73*9880d681SAndroid Build Coastguard Worker  %7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
74*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds [10 x float], [10 x float]* %7, i32 0, i32 %i
75*9880d681SAndroid Build Coastguard Worker  %9 = load float, float* %8, align 4
76*9880d681SAndroid Build Coastguard Worker  call void @use(float %9)
77*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}];
78*9880d681SAndroid Build Coastguard Worker  ; cast; gep; store
79*9880d681SAndroid Build Coastguard Worker  store float %v, float* %8, align 4
80*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [%{{(r|rl|rd)[0-9]+}}], %f{{[0-9]+}};
81*9880d681SAndroid Build Coastguard Worker  call void @llvm.nvvm.barrier0()
82*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0;
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Worker  ret void
85*9880d681SAndroid Build Coastguard Worker}
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Worker; When hoisting an addrspacecast between different pointer types, replace the
88*9880d681SAndroid Build Coastguard Worker; addrspacecast with a bitcast.
89*9880d681SAndroid Build Coastguard Workerdefine i32 @ld_int_from_float() {
90*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @ld_int_from_float
91*9880d681SAndroid Build Coastguard Worker; IR: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
92*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld_int_from_float(
93*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.u{{(32|64)}}
94*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
95*9880d681SAndroid Build Coastguard Worker  ret i32 %1
96*9880d681SAndroid Build Coastguard Worker}
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Workerdefine i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) {
99*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @ld_int_from_global_float(
100*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld_int_from_global_float(
101*9880d681SAndroid Build Coastguard Worker  %1 = addrspacecast float addrspace(1)* %input to float*
102*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr float, float* %1, i32 %i
103*9880d681SAndroid Build Coastguard Worker; IR-NEXT: getelementptr float, float addrspace(1)* %input, i32 %i
104*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr float, float* %2, i32 %j
105*9880d681SAndroid Build Coastguard Worker; IR-NEXT: getelementptr float, float addrspace(1)* {{%[^,]+}}, i32 %j
106*9880d681SAndroid Build Coastguard Worker  %4 = bitcast float* %3 to i32*
107*9880d681SAndroid Build Coastguard Worker; IR-NEXT: bitcast float addrspace(1)* {{%[^ ]+}} to i32 addrspace(1)*
108*9880d681SAndroid Build Coastguard Worker  %5 = load i32, i32* %4
109*9880d681SAndroid Build Coastguard Worker; IR-NEXT: load i32, i32 addrspace(1)* {{%.+}}
110*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld.global
111*9880d681SAndroid Build Coastguard Worker  ret i32 %5
112*9880d681SAndroid Build Coastguard Worker}
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Workerdefine void @nested_const_expr() {
115*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: nested_const_expr(
116*9880d681SAndroid Build Coastguard Worker  ; store 1 to bitcast(gep(addrspacecast(array), 0, 1))
117*9880d681SAndroid Build Coastguard Worker  store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4
118*9880d681SAndroid Build Coastguard Worker; PTX: mov.u32 %r1, 1;
119*9880d681SAndroid Build Coastguard Worker; PTX-NEXT: st.shared.u32 [array+4], %r1;
120*9880d681SAndroid Build Coastguard Worker  ret void
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine void @rauw(float addrspace(1)* %input) {
124*9880d681SAndroid Build Coastguard Worker  %generic_input = addrspacecast float addrspace(1)* %input to float*
125*9880d681SAndroid Build Coastguard Worker  %addr = getelementptr float, float* %generic_input, i64 10
126*9880d681SAndroid Build Coastguard Worker  %v = load float, float* %addr
127*9880d681SAndroid Build Coastguard Worker  store float %v, float* %addr
128*9880d681SAndroid Build Coastguard Worker  ret void
129*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @rauw(
130*9880d681SAndroid Build Coastguard Worker; IR-NEXT: %addr = getelementptr float, float addrspace(1)* %input, i64 10
131*9880d681SAndroid Build Coastguard Worker; IR-NEXT: %v = load float, float addrspace(1)* %addr
132*9880d681SAndroid Build Coastguard Worker; IR-NEXT: store float %v, float addrspace(1)* %addr
133*9880d681SAndroid Build Coastguard Worker; IR-NEXT: ret void
134*9880d681SAndroid Build Coastguard Worker}
135*9880d681SAndroid Build Coastguard Worker
136*9880d681SAndroid Build Coastguard Workerdefine void @loop() {
137*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP-LABEL: @loop(
138*9880d681SAndroid Build Coastguard Workerentry:
139*9880d681SAndroid Build Coastguard Worker  %p = addrspacecast [10 x float] addrspace(3)* @array to float*
140*9880d681SAndroid Build Coastguard Worker  %end = getelementptr float, float* %p, i64 10
141*9880d681SAndroid Build Coastguard Worker  br label %loop
142*9880d681SAndroid Build Coastguard Worker
143*9880d681SAndroid Build Coastguard Workerloop:
144*9880d681SAndroid Build Coastguard Worker  %i = phi float* [ %p, %entry ], [ %i2, %loop ]
145*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ]
146*9880d681SAndroid Build Coastguard Worker  %v = load float, float* %i
147*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %v = load float, float addrspace(3)* %i
148*9880d681SAndroid Build Coastguard Worker  call void @use(float %v)
149*9880d681SAndroid Build Coastguard Worker  %i2 = getelementptr float, float* %i, i64 1
150*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
151*9880d681SAndroid Build Coastguard Worker  %exit_cond = icmp eq float* %i2, %end
152*9880d681SAndroid Build Coastguard Worker  br i1 %exit_cond, label %exit, label %loop
153*9880d681SAndroid Build Coastguard Worker
154*9880d681SAndroid Build Coastguard Workerexit:
155*9880d681SAndroid Build Coastguard Worker  ret void
156*9880d681SAndroid Build Coastguard Worker}
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Worker@generic_end = external global float*
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Workerdefine void @loop_with_generic_bound() {
161*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP-LABEL: @loop_with_generic_bound(
162*9880d681SAndroid Build Coastguard Workerentry:
163*9880d681SAndroid Build Coastguard Worker  %p = addrspacecast [10 x float] addrspace(3)* @array to float*
164*9880d681SAndroid Build Coastguard Worker  %end = load float*, float** @generic_end
165*9880d681SAndroid Build Coastguard Worker  br label %loop
166*9880d681SAndroid Build Coastguard Worker
167*9880d681SAndroid Build Coastguard Workerloop:
168*9880d681SAndroid Build Coastguard Worker  %i = phi float* [ %p, %entry ], [ %i2, %loop ]
169*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ]
170*9880d681SAndroid Build Coastguard Worker  %v = load float, float* %i
171*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %v = load float, float addrspace(3)* %i
172*9880d681SAndroid Build Coastguard Worker  call void @use(float %v)
173*9880d681SAndroid Build Coastguard Worker  %i2 = getelementptr float, float* %i, i64 1
174*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
175*9880d681SAndroid Build Coastguard Worker  %exit_cond = icmp eq float* %i2, %end
176*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: addrspacecast float addrspace(3)* %i2 to float*
177*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: icmp eq float* %{{[0-9]+}}, %end
178*9880d681SAndroid Build Coastguard Worker  br i1 %exit_cond, label %exit, label %loop
179*9880d681SAndroid Build Coastguard Worker
180*9880d681SAndroid Build Coastguard Workerexit:
181*9880d681SAndroid Build Coastguard Worker  ret void
182*9880d681SAndroid Build Coastguard Worker}
183*9880d681SAndroid Build Coastguard Worker
184*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.nvvm.barrier0() #3
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Workerdeclare void @use(float)
187*9880d681SAndroid Build Coastguard Worker
188*9880d681SAndroid Build Coastguard Workerattributes #3 = { noduplicate nounwind }
189