xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/smrd.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN  %s
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; SMRD load with an immediate offset.
6*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd0:
7*9880d681SAndroid Build Coastguard Worker; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
8*9880d681SAndroid Build Coastguard Worker; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
9*9880d681SAndroid Build Coastguard Workerdefine void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
10*9880d681SAndroid Build Coastguard Workerentry:
11*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
12*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32 addrspace(2)* %0
13*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32 addrspace(1)* %out
14*9880d681SAndroid Build Coastguard Worker  ret void
15*9880d681SAndroid Build Coastguard Worker}
16*9880d681SAndroid Build Coastguard Worker
17*9880d681SAndroid Build Coastguard Worker; SMRD load with the largest possible immediate offset.
18*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd1:
19*9880d681SAndroid Build Coastguard Worker; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
20*9880d681SAndroid Build Coastguard Worker; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
21*9880d681SAndroid Build Coastguard Workerdefine void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
22*9880d681SAndroid Build Coastguard Workerentry:
23*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
24*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32 addrspace(2)* %0
25*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32 addrspace(1)* %out
26*9880d681SAndroid Build Coastguard Worker  ret void
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; SMRD load with an offset greater than the largest possible immediate.
30*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd2:
31*9880d681SAndroid Build Coastguard Worker; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
32*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
33*9880d681SAndroid Build Coastguard Worker; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
34*9880d681SAndroid Build Coastguard Worker; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
35*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
36*9880d681SAndroid Build Coastguard Workerdefine void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
37*9880d681SAndroid Build Coastguard Workerentry:
38*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
39*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32 addrspace(2)* %0
40*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32 addrspace(1)* %out
41*9880d681SAndroid Build Coastguard Worker  ret void
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker; SMRD load with a 64-bit offset
45*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd3:
46*9880d681SAndroid Build Coastguard Worker; FIXME: There are too many copies here because we don't fold immediates
47*9880d681SAndroid Build Coastguard Worker;        through REG_SEQUENCE
48*9880d681SAndroid Build Coastguard Worker; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
49*9880d681SAndroid Build Coastguard Worker; TODO: Add VI checks
50*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
51*9880d681SAndroid Build Coastguard Workerdefine void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
52*9880d681SAndroid Build Coastguard Workerentry:
53*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
54*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32 addrspace(2)* %0
55*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32 addrspace(1)* %out
56*9880d681SAndroid Build Coastguard Worker  ret void
57*9880d681SAndroid Build Coastguard Worker}
58*9880d681SAndroid Build Coastguard Worker
59*9880d681SAndroid Build Coastguard Worker; SMRD load with the largest possible immediate offset on VI
60*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd4:
61*9880d681SAndroid Build Coastguard Worker; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
62*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
63*9880d681SAndroid Build Coastguard Worker; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
64*9880d681SAndroid Build Coastguard Worker; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
65*9880d681SAndroid Build Coastguard Workerdefine void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
66*9880d681SAndroid Build Coastguard Workerentry:
67*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
68*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32 addrspace(2)* %0
69*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32 addrspace(1)* %out
70*9880d681SAndroid Build Coastguard Worker  ret void
71*9880d681SAndroid Build Coastguard Worker}
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Worker; SMRD load with an offset greater than the largest possible immediate on VI
74*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd5:
75*9880d681SAndroid Build Coastguard Worker; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
76*9880d681SAndroid Build Coastguard Worker; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
77*9880d681SAndroid Build Coastguard Worker; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
78*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
79*9880d681SAndroid Build Coastguard Workerdefine void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
80*9880d681SAndroid Build Coastguard Workerentry:
81*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
82*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32 addrspace(2)* %0
83*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32 addrspace(1)* %out
84*9880d681SAndroid Build Coastguard Worker  ret void
85*9880d681SAndroid Build Coastguard Worker}
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Worker; SMRD load using the load.const intrinsic with an immediate offset
88*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd_load_const0:
89*9880d681SAndroid Build Coastguard Worker; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
90*9880d681SAndroid Build Coastguard Worker; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
91*9880d681SAndroid Build Coastguard Workerdefine amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
92*9880d681SAndroid Build Coastguard Workermain_body:
93*9880d681SAndroid Build Coastguard Worker  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
94*9880d681SAndroid Build Coastguard Worker  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
95*9880d681SAndroid Build Coastguard Worker  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
96*9880d681SAndroid Build Coastguard Worker  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
97*9880d681SAndroid Build Coastguard Worker  ret void
98*9880d681SAndroid Build Coastguard Worker}
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Worker; SMRD load using the load.const intrinsic with the largest possible immediate
101*9880d681SAndroid Build Coastguard Worker; offset.
102*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd_load_const1:
103*9880d681SAndroid Build Coastguard Worker; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
104*9880d681SAndroid Build Coastguard Worker; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
105*9880d681SAndroid Build Coastguard Workerdefine amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
106*9880d681SAndroid Build Coastguard Workermain_body:
107*9880d681SAndroid Build Coastguard Worker  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
108*9880d681SAndroid Build Coastguard Worker  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
109*9880d681SAndroid Build Coastguard Worker  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
110*9880d681SAndroid Build Coastguard Worker  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
111*9880d681SAndroid Build Coastguard Worker  ret void
112*9880d681SAndroid Build Coastguard Worker}
113*9880d681SAndroid Build Coastguard Worker; SMRD load using the load.const intrinsic with an offset greater than the
114*9880d681SAndroid Build Coastguard Worker; largets possible immediate.
115*9880d681SAndroid Build Coastguard Worker; immediate offset.
116*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd_load_const2:
117*9880d681SAndroid Build Coastguard Worker; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
118*9880d681SAndroid Build Coastguard Worker; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
119*9880d681SAndroid Build Coastguard Worker; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
120*9880d681SAndroid Build Coastguard Worker; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
121*9880d681SAndroid Build Coastguard Workerdefine amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
122*9880d681SAndroid Build Coastguard Workermain_body:
123*9880d681SAndroid Build Coastguard Worker  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
124*9880d681SAndroid Build Coastguard Worker  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
125*9880d681SAndroid Build Coastguard Worker  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
126*9880d681SAndroid Build Coastguard Worker  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
127*9880d681SAndroid Build Coastguard Worker  ret void
128*9880d681SAndroid Build Coastguard Worker}
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Worker; SMRD load with the largest possible immediate offset on VI
131*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd_load_const3:
132*9880d681SAndroid Build Coastguard Worker; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
133*9880d681SAndroid Build Coastguard Worker; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
134*9880d681SAndroid Build Coastguard Worker; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
135*9880d681SAndroid Build Coastguard Worker; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
136*9880d681SAndroid Build Coastguard Workerdefine amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
137*9880d681SAndroid Build Coastguard Workermain_body:
138*9880d681SAndroid Build Coastguard Worker  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
139*9880d681SAndroid Build Coastguard Worker  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
140*9880d681SAndroid Build Coastguard Worker  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
141*9880d681SAndroid Build Coastguard Worker  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
142*9880d681SAndroid Build Coastguard Worker  ret void
143*9880d681SAndroid Build Coastguard Worker}
144*9880d681SAndroid Build Coastguard Worker
145*9880d681SAndroid Build Coastguard Worker; SMRD load with an offset greater than the largest possible immediate on VI
146*9880d681SAndroid Build Coastguard Worker; GCN-LABEL: {{^}}smrd_load_const4:
147*9880d681SAndroid Build Coastguard Worker; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
148*9880d681SAndroid Build Coastguard Worker; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
149*9880d681SAndroid Build Coastguard Worker; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
150*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
151*9880d681SAndroid Build Coastguard Workerdefine amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
152*9880d681SAndroid Build Coastguard Workermain_body:
153*9880d681SAndroid Build Coastguard Worker  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
154*9880d681SAndroid Build Coastguard Worker  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
155*9880d681SAndroid Build Coastguard Worker  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
156*9880d681SAndroid Build Coastguard Worker  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
157*9880d681SAndroid Build Coastguard Worker  ret void
158*9880d681SAndroid Build Coastguard Worker}
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
161*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.SI.load.const(<16 x i8>, i32) #0
162*9880d681SAndroid Build Coastguard Worker
163*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind readnone }
166