xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/ds_write2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker@lds = addrspace(3) global [512 x float] undef, align 4
4*9880d681SAndroid Build Coastguard Worker@lds.f64 = addrspace(3) global [512 x double] undef, align 8
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_one_val_f32
8*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
9*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
10*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
11*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
12*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
13*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
14*9880d681SAndroid Build Coastguard Worker  %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
15*9880d681SAndroid Build Coastguard Worker  %val = load float, float addrspace(1)* %in.gep, align 4
16*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
17*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx0, align 4
18*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
19*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
20*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx1, align 4
21*9880d681SAndroid Build Coastguard Worker  ret void
22*9880d681SAndroid Build Coastguard Worker}
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32
25*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
26*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
27*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
28*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
29*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
30*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
31*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
32*9880d681SAndroid Build Coastguard Worker  %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
33*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
34*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4
35*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4
36*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
37*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
38*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
39*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
40*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
41*9880d681SAndroid Build Coastguard Worker  ret void
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_volatile_0
45*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_write2_b32
46*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
47*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
48*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
49*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
50*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
51*9880d681SAndroid Build Coastguard Worker  %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
52*9880d681SAndroid Build Coastguard Worker  %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
53*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4
54*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4
55*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
56*9880d681SAndroid Build Coastguard Worker  store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
57*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
58*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
59*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
60*9880d681SAndroid Build Coastguard Worker  ret void
61*9880d681SAndroid Build Coastguard Worker}
62*9880d681SAndroid Build Coastguard Worker
63*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_volatile_1
64*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_write2_b32
65*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
66*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
67*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
68*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
69*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
70*9880d681SAndroid Build Coastguard Worker  %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
71*9880d681SAndroid Build Coastguard Worker  %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
72*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4
73*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4
74*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
75*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
76*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
77*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
78*9880d681SAndroid Build Coastguard Worker  store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
79*9880d681SAndroid Build Coastguard Worker  ret void
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Worker; 2 data subregisters from different super registers.
83*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
84*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
85*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
86*9880d681SAndroid Build Coastguard Worker; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
87*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
88*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
89*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
90*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
91*9880d681SAndroid Build Coastguard Worker  %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
92*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
93*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
94*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
95*9880d681SAndroid Build Coastguard Worker  %val0.0 = extractelement <2 x float> %val0, i32 0
96*9880d681SAndroid Build Coastguard Worker  %val1.1 = extractelement <2 x float> %val1, i32 1
97*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
98*9880d681SAndroid Build Coastguard Worker  store float %val0.0, float addrspace(3)* %arrayidx0, align 4
99*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
100*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
101*9880d681SAndroid Build Coastguard Worker  store float %val1.1, float addrspace(3)* %arrayidx1, align 4
102*9880d681SAndroid Build Coastguard Worker  ret void
103*9880d681SAndroid Build Coastguard Worker}
104*9880d681SAndroid Build Coastguard Worker
105*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_subreg2_f32
106*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
107*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
108*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
109*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
110*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
111*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
112*9880d681SAndroid Build Coastguard Worker  %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
113*9880d681SAndroid Build Coastguard Worker  %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
114*9880d681SAndroid Build Coastguard Worker  %val0 = extractelement <2 x float> %val, i32 0
115*9880d681SAndroid Build Coastguard Worker  %val1 = extractelement <2 x float> %val, i32 1
116*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
117*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
118*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
119*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
120*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
121*9880d681SAndroid Build Coastguard Worker  ret void
122*9880d681SAndroid Build Coastguard Worker}
123*9880d681SAndroid Build Coastguard Worker
124*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_subreg4_f32
125*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
126*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
127*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
128*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
129*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
130*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
131*9880d681SAndroid Build Coastguard Worker  %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
132*9880d681SAndroid Build Coastguard Worker  %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
133*9880d681SAndroid Build Coastguard Worker  %val0 = extractelement <4 x float> %val, i32 0
134*9880d681SAndroid Build Coastguard Worker  %val1 = extractelement <4 x float> %val, i32 3
135*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
136*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
137*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
138*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
139*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
140*9880d681SAndroid Build Coastguard Worker  ret void
141*9880d681SAndroid Build Coastguard Worker}
142*9880d681SAndroid Build Coastguard Worker
143*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_max_offset_f32
144*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
145*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
146*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
147*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
148*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
149*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
150*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
151*9880d681SAndroid Build Coastguard Worker  %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
152*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
153*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4
154*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4
155*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
156*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
157*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 255
158*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
159*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
160*9880d681SAndroid Build Coastguard Worker  ret void
161*9880d681SAndroid Build Coastguard Worker}
162*9880d681SAndroid Build Coastguard Worker
163*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_too_far_f32
164*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
165*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
166*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
167*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
168*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
169*9880d681SAndroid Build Coastguard Worker  %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
170*9880d681SAndroid Build Coastguard Worker  %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
171*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(1)* %in0.gep, align 4
172*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(1)* %in1.gep, align 4
173*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
174*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
175*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 257
176*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
177*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
178*9880d681SAndroid Build Coastguard Worker  ret void
179*9880d681SAndroid Build Coastguard Worker}
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_x2
182*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset1:11
183*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27
184*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
185*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
186*9880d681SAndroid Build Coastguard Worker  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
187*9880d681SAndroid Build Coastguard Worker  %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
188*9880d681SAndroid Build Coastguard Worker  %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
189*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(1)* %in0.gep, align 4
190*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(1)* %in1.gep, align 4
191*9880d681SAndroid Build Coastguard Worker
192*9880d681SAndroid Build Coastguard Worker  %idx.0 = add nsw i32 %tid.x, 0
193*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
194*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Worker  %idx.1 = add nsw i32 %tid.x, 8
197*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
198*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
199*9880d681SAndroid Build Coastguard Worker
200*9880d681SAndroid Build Coastguard Worker  %idx.2 = add nsw i32 %tid.x, 11
201*9880d681SAndroid Build Coastguard Worker  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
202*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx2, align 4
203*9880d681SAndroid Build Coastguard Worker
204*9880d681SAndroid Build Coastguard Worker  %idx.3 = add nsw i32 %tid.x, 27
205*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
206*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx3, align 4
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Worker  ret void
209*9880d681SAndroid Build Coastguard Worker}
210*9880d681SAndroid Build Coastguard Worker
211*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
212*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset0:3 offset1:11
213*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27
214*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
215*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
216*9880d681SAndroid Build Coastguard Worker  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
217*9880d681SAndroid Build Coastguard Worker  %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
218*9880d681SAndroid Build Coastguard Worker  %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
219*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(1)* %in0.gep, align 4
220*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(1)* %in1.gep, align 4
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Worker  %idx.0 = add nsw i32 %tid.x, 3
223*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
224*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx0, align 4
225*9880d681SAndroid Build Coastguard Worker
226*9880d681SAndroid Build Coastguard Worker  %idx.1 = add nsw i32 %tid.x, 8
227*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
228*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx1, align 4
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Worker  %idx.2 = add nsw i32 %tid.x, 11
231*9880d681SAndroid Build Coastguard Worker  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
232*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %arrayidx2, align 4
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Worker  %idx.3 = add nsw i32 %tid.x, 27
235*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
236*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %arrayidx3, align 4
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Worker  ret void
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
242*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_write2_b32
243*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32
244*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32
245*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
246*9880d681SAndroid Build Coastguard Workerdefine void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
247*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
248*9880d681SAndroid Build Coastguard Worker  %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
249*9880d681SAndroid Build Coastguard Worker  %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
250*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float addrspace(1)* %in0.gep, align 4
251*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float addrspace(1)* %in1.gep, align 4
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Worker  %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
254*9880d681SAndroid Build Coastguard Worker  %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
255*9880d681SAndroid Build Coastguard Worker  %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
256*9880d681SAndroid Build Coastguard Worker  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
257*9880d681SAndroid Build Coastguard Worker  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Worker  ; Apply an additional offset after the vector that will be more obviously folded.
260*9880d681SAndroid Build Coastguard Worker  %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
261*9880d681SAndroid Build Coastguard Worker  store float %val0, float addrspace(3)* %gep.0, align 4
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
264*9880d681SAndroid Build Coastguard Worker  store float %val1, float addrspace(3)* %gep.1.offset, align 4
265*9880d681SAndroid Build Coastguard Worker  ret void
266*9880d681SAndroid Build Coastguard Worker}
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_one_val_f64
269*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
270*9880d681SAndroid Build Coastguard Worker; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
271*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
272*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
273*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
274*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
275*9880d681SAndroid Build Coastguard Worker  %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
276*9880d681SAndroid Build Coastguard Worker  %val = load double, double addrspace(1)* %in.gep, align 8
277*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
278*9880d681SAndroid Build Coastguard Worker  store double %val, double addrspace(3)* %arrayidx0, align 8
279*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
280*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
281*9880d681SAndroid Build Coastguard Worker  store double %val, double addrspace(3)* %arrayidx1, align 8
282*9880d681SAndroid Build Coastguard Worker  ret void
283*9880d681SAndroid Build Coastguard Worker}
284*9880d681SAndroid Build Coastguard Worker
285*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_simple_write2_one_val_f64
286*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
287*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
288*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
289*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
290*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
291*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
292*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
293*9880d681SAndroid Build Coastguard Worker  %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
294*9880d681SAndroid Build Coastguard Worker  %val = load double, double addrspace(1)* %in.gep, align 8
295*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
296*9880d681SAndroid Build Coastguard Worker  store double %val, double addrspace(3)* %arrayidx0, align 4
297*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 7
298*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
299*9880d681SAndroid Build Coastguard Worker  store double %val, double addrspace(3)* %arrayidx1, align 4
300*9880d681SAndroid Build Coastguard Worker  ret void
301*9880d681SAndroid Build Coastguard Worker}
302*9880d681SAndroid Build Coastguard Worker
303*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f64
304*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
305*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
306*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
307*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
308*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
309*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
310*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
311*9880d681SAndroid Build Coastguard Worker  %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
312*9880d681SAndroid Build Coastguard Worker  %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
313*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile double, double addrspace(1)* %in.gep.0, align 8
314*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile double, double addrspace(1)* %in.gep.1, align 8
315*9880d681SAndroid Build Coastguard Worker  %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
316*9880d681SAndroid Build Coastguard Worker  store double %val0, double addrspace(3)* %arrayidx0, align 8
317*9880d681SAndroid Build Coastguard Worker  %add.x = add nsw i32 %x.i, 8
318*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
319*9880d681SAndroid Build Coastguard Worker  store double %val1, double addrspace(3)* %arrayidx1, align 8
320*9880d681SAndroid Build Coastguard Worker  ret void
321*9880d681SAndroid Build Coastguard Worker}
322*9880d681SAndroid Build Coastguard Worker
323*9880d681SAndroid Build Coastguard Worker@foo = addrspace(3) global [4 x i32] undef, align 4
324*9880d681SAndroid Build Coastguard Worker
325*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_constant_adjacent_offsets
326*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
327*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
328*9880d681SAndroid Build Coastguard Workerdefine void @store_constant_adjacent_offsets() {
329*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
330*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
331*9880d681SAndroid Build Coastguard Worker  ret void
332*9880d681SAndroid Build Coastguard Worker}
333*9880d681SAndroid Build Coastguard Worker
334*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_constant_disjoint_offsets
335*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
336*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
337*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
338*9880d681SAndroid Build Coastguard Workerdefine void @store_constant_disjoint_offsets() {
339*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
340*9880d681SAndroid Build Coastguard Worker  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
341*9880d681SAndroid Build Coastguard Worker  ret void
342*9880d681SAndroid Build Coastguard Worker}
343*9880d681SAndroid Build Coastguard Worker
344*9880d681SAndroid Build Coastguard Worker@bar = addrspace(3) global [4 x i64] undef, align 4
345*9880d681SAndroid Build Coastguard Worker
346*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_misaligned64_constant_offsets
347*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
348*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
349*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
350*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
351*9880d681SAndroid Build Coastguard Workerdefine void @store_misaligned64_constant_offsets() {
352*9880d681SAndroid Build Coastguard Worker  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
353*9880d681SAndroid Build Coastguard Worker  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
354*9880d681SAndroid Build Coastguard Worker  ret void
355*9880d681SAndroid Build Coastguard Worker}
356*9880d681SAndroid Build Coastguard Worker
357*9880d681SAndroid Build Coastguard Worker@bar.large = addrspace(3) global [4096 x i64] undef, align 4
358*9880d681SAndroid Build Coastguard Worker
359*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_misaligned64_constant_large_offsets
360*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
361*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
362*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
363*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
364*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm
365*9880d681SAndroid Build Coastguard Workerdefine void @store_misaligned64_constant_large_offsets() {
366*9880d681SAndroid Build Coastguard Worker  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
367*9880d681SAndroid Build Coastguard Worker  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
368*9880d681SAndroid Build Coastguard Worker  ret void
369*9880d681SAndroid Build Coastguard Worker}
370*9880d681SAndroid Build Coastguard Worker
371*9880d681SAndroid Build Coastguard Worker@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
372*9880d681SAndroid Build Coastguard Worker@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
373*9880d681SAndroid Build Coastguard Worker
374*9880d681SAndroid Build Coastguard Workerdefine void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
375*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
376*9880d681SAndroid Build Coastguard Worker  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
377*9880d681SAndroid Build Coastguard Worker  %val = load float, float addrspace(1)* %in
378*9880d681SAndroid Build Coastguard Worker  %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
379*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx44, align 4
380*9880d681SAndroid Build Coastguard Worker  %add47 = add nsw i32 %x.i, 1
381*9880d681SAndroid Build Coastguard Worker  %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
382*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx48, align 4
383*9880d681SAndroid Build Coastguard Worker  %add51 = add nsw i32 %x.i, 16
384*9880d681SAndroid Build Coastguard Worker  %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
385*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx52, align 4
386*9880d681SAndroid Build Coastguard Worker  %add55 = add nsw i32 %x.i, 17
387*9880d681SAndroid Build Coastguard Worker  %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
388*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx56, align 4
389*9880d681SAndroid Build Coastguard Worker  %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
390*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx60, align 4
391*9880d681SAndroid Build Coastguard Worker  %add63 = add nsw i32 %y.i, 1
392*9880d681SAndroid Build Coastguard Worker  %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
393*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx64, align 4
394*9880d681SAndroid Build Coastguard Worker  %add67 = add nsw i32 %y.i, 32
395*9880d681SAndroid Build Coastguard Worker  %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
396*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx68, align 4
397*9880d681SAndroid Build Coastguard Worker  %add71 = add nsw i32 %y.i, 33
398*9880d681SAndroid Build Coastguard Worker  %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
399*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx72, align 4
400*9880d681SAndroid Build Coastguard Worker  %add75 = add nsw i32 %y.i, 64
401*9880d681SAndroid Build Coastguard Worker  %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
402*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx76, align 4
403*9880d681SAndroid Build Coastguard Worker  %add79 = add nsw i32 %y.i, 65
404*9880d681SAndroid Build Coastguard Worker  %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
405*9880d681SAndroid Build Coastguard Worker  store float %val, float addrspace(3)* %arrayidx80, align 4
406*9880d681SAndroid Build Coastguard Worker  ret void
407*9880d681SAndroid Build Coastguard Worker}
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Worker; CI-LABEL: {{^}}simple_write2_v4f32_superreg_align4:
410*9880d681SAndroid Build Coastguard Worker; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:2{{$}}
411*9880d681SAndroid Build Coastguard Worker; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}}
412*9880d681SAndroid Build Coastguard Worker; CI: s_endpgm
413*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 {
414*9880d681SAndroid Build Coastguard Worker  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
415*9880d681SAndroid Build Coastguard Worker  %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in
416*9880d681SAndroid Build Coastguard Worker  %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4
417*9880d681SAndroid Build Coastguard Worker  %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i
418*9880d681SAndroid Build Coastguard Worker  store <4 x float> %val0, <4 x float> addrspace(3)* %out.gep, align 4
419*9880d681SAndroid Build Coastguard Worker  ret void
420*9880d681SAndroid Build Coastguard Worker}
421*9880d681SAndroid Build Coastguard Worker
422*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
423*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.x() #1
424*9880d681SAndroid Build Coastguard Worker
425*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
426*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.y() #1
427*9880d681SAndroid Build Coastguard Worker
428*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
429*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #1
430*9880d681SAndroid Build Coastguard Worker
431*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone
432*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.y() #1
433*9880d681SAndroid Build Coastguard Worker
434*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind }
435*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind readnone }
436*9880d681SAndroid Build Coastguard Workerattributes #2 = { convergent nounwind }
437