xref: /aosp_15_r20/external/llvm/test/CodeGen/NVPTX/vector-loads.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; Even though general vector types are not supported in PTX, we can still
4*9880d681SAndroid Build Coastguard Worker; optimize loads/stores with pseudo-vector instructions of the form:
5*9880d681SAndroid Build Coastguard Worker;
6*9880d681SAndroid Build Coastguard Worker; ld.v2.f32 {%f0, %f1}, [%r0]
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker; which will load two floats at once into scalar registers.
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Workerdefine void @foo(<2 x float>* %a) {
11*9880d681SAndroid Build Coastguard Worker; CHECK: .func foo
12*9880d681SAndroid Build Coastguard Worker; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
13*9880d681SAndroid Build Coastguard Worker  %t1 = load <2 x float>, <2 x float>* %a
14*9880d681SAndroid Build Coastguard Worker  %t2 = fmul <2 x float> %t1, %t1
15*9880d681SAndroid Build Coastguard Worker  store <2 x float> %t2, <2 x float>* %a
16*9880d681SAndroid Build Coastguard Worker  ret void
17*9880d681SAndroid Build Coastguard Worker}
18*9880d681SAndroid Build Coastguard Worker
19*9880d681SAndroid Build Coastguard Workerdefine void @foo2(<4 x float>* %a) {
20*9880d681SAndroid Build Coastguard Worker; CHECK: .func foo2
21*9880d681SAndroid Build Coastguard Worker; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
22*9880d681SAndroid Build Coastguard Worker  %t1 = load <4 x float>, <4 x float>* %a
23*9880d681SAndroid Build Coastguard Worker  %t2 = fmul <4 x float> %t1, %t1
24*9880d681SAndroid Build Coastguard Worker  store <4 x float> %t2, <4 x float>* %a
25*9880d681SAndroid Build Coastguard Worker  ret void
26*9880d681SAndroid Build Coastguard Worker}
27*9880d681SAndroid Build Coastguard Worker
28*9880d681SAndroid Build Coastguard Workerdefine void @foo3(<8 x float>* %a) {
29*9880d681SAndroid Build Coastguard Worker; CHECK: .func foo3
30*9880d681SAndroid Build Coastguard Worker; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
31*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
32*9880d681SAndroid Build Coastguard Worker  %t1 = load <8 x float>, <8 x float>* %a
33*9880d681SAndroid Build Coastguard Worker  %t2 = fmul <8 x float> %t1, %t1
34*9880d681SAndroid Build Coastguard Worker  store <8 x float> %t2, <8 x float>* %a
35*9880d681SAndroid Build Coastguard Worker  ret void
36*9880d681SAndroid Build Coastguard Worker}
37*9880d681SAndroid Build Coastguard Worker
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Workerdefine void @foo4(<2 x i32>* %a) {
41*9880d681SAndroid Build Coastguard Worker; CHECK: .func foo4
42*9880d681SAndroid Build Coastguard Worker; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
43*9880d681SAndroid Build Coastguard Worker  %t1 = load <2 x i32>, <2 x i32>* %a
44*9880d681SAndroid Build Coastguard Worker  %t2 = mul <2 x i32> %t1, %t1
45*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %t2, <2 x i32>* %a
46*9880d681SAndroid Build Coastguard Worker  ret void
47*9880d681SAndroid Build Coastguard Worker}
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Workerdefine void @foo5(<4 x i32>* %a) {
50*9880d681SAndroid Build Coastguard Worker; CHECK: .func foo5
51*9880d681SAndroid Build Coastguard Worker; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
52*9880d681SAndroid Build Coastguard Worker  %t1 = load <4 x i32>, <4 x i32>* %a
53*9880d681SAndroid Build Coastguard Worker  %t2 = mul <4 x i32> %t1, %t1
54*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %t2, <4 x i32>* %a
55*9880d681SAndroid Build Coastguard Worker  ret void
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdefine void @foo6(<8 x i32>* %a) {
59*9880d681SAndroid Build Coastguard Worker; CHECK: .func foo6
60*9880d681SAndroid Build Coastguard Worker; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
61*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
62*9880d681SAndroid Build Coastguard Worker  %t1 = load <8 x i32>, <8 x i32>* %a
63*9880d681SAndroid Build Coastguard Worker  %t2 = mul <8 x i32> %t1, %t1
64*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %t2, <8 x i32>* %a
65*9880d681SAndroid Build Coastguard Worker  ret void
66*9880d681SAndroid Build Coastguard Worker}
67