1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Worker@lds = addrspace(3) global [512 x float] undef, align 4 4*9880d681SAndroid Build Coastguard Worker@lds.f64 = addrspace(3) global [512 x double] undef, align 8 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_one_val_f32 8*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] 9*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} 10*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8 11*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 12*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { 13*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 14*9880d681SAndroid Build Coastguard Worker %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i 15*9880d681SAndroid Build Coastguard Worker %val = load float, float addrspace(1)* %in.gep, align 4 16*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 17*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx0, align 4 18*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 19*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 20*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx1, align 4 21*9880d681SAndroid Build Coastguard Worker ret void 22*9880d681SAndroid Build Coastguard Worker} 23*9880d681SAndroid Build Coastguard Worker 24*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32 25*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 26*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 27*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} 28*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 29*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 30*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { 31*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 32*9880d681SAndroid Build Coastguard Worker %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i 33*9880d681SAndroid Build Coastguard Worker %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 34*9880d681SAndroid Build Coastguard Worker %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4 35*9880d681SAndroid Build Coastguard Worker %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4 36*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 37*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 38*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 39*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 40*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 41*9880d681SAndroid Build Coastguard Worker ret void 42*9880d681SAndroid Build Coastguard Worker} 43*9880d681SAndroid Build Coastguard Worker 44*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_volatile_0 45*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_write2_b32 46*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} 47*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 48*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 49*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { 50*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 51*9880d681SAndroid Build Coastguard Worker %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i 52*9880d681SAndroid Build Coastguard Worker %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i 53*9880d681SAndroid Build Coastguard Worker %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4 54*9880d681SAndroid Build Coastguard Worker %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4 55*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 56*9880d681SAndroid Build Coastguard Worker store volatile float %val0, float addrspace(3)* %arrayidx0, align 4 57*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 58*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 59*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 60*9880d681SAndroid Build Coastguard Worker ret void 61*9880d681SAndroid Build Coastguard Worker} 62*9880d681SAndroid Build Coastguard Worker 63*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_volatile_1 64*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_write2_b32 65*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} 66*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 67*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 68*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { 69*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 70*9880d681SAndroid Build Coastguard Worker %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i 71*9880d681SAndroid Build Coastguard Worker %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i 72*9880d681SAndroid Build Coastguard Worker %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4 73*9880d681SAndroid Build Coastguard Worker %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4 74*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 75*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 76*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 77*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 78*9880d681SAndroid Build Coastguard Worker store volatile float %val1, float addrspace(3)* %arrayidx1, align 4 79*9880d681SAndroid Build Coastguard Worker ret void 80*9880d681SAndroid Build Coastguard Worker} 81*9880d681SAndroid Build Coastguard Worker 82*9880d681SAndroid Build Coastguard Worker; 2 data subregisters from different super registers. 83*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32 84*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} 85*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} 86*9880d681SAndroid Build Coastguard Worker; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} 87*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 88*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 89*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { 90*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 91*9880d681SAndroid Build Coastguard Worker %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i 92*9880d681SAndroid Build Coastguard Worker %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1 93*9880d681SAndroid Build Coastguard Worker %val0 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8 94*9880d681SAndroid Build Coastguard Worker %val1 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8 95*9880d681SAndroid Build Coastguard Worker %val0.0 = extractelement <2 x float> %val0, i32 0 96*9880d681SAndroid Build Coastguard Worker %val1.1 = extractelement <2 x float> %val1, i32 1 97*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 98*9880d681SAndroid Build Coastguard Worker store float %val0.0, float addrspace(3)* %arrayidx0, align 4 99*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 100*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 101*9880d681SAndroid Build Coastguard Worker store float %val1.1, float addrspace(3)* %arrayidx1, align 4 102*9880d681SAndroid Build Coastguard Worker ret void 103*9880d681SAndroid Build Coastguard Worker} 104*9880d681SAndroid Build Coastguard Worker 105*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_subreg2_f32 106*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} 107*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} 108*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 109*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 110*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { 111*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 112*9880d681SAndroid Build Coastguard Worker %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i 113*9880d681SAndroid Build Coastguard Worker %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8 114*9880d681SAndroid Build Coastguard Worker %val0 = extractelement <2 x float> %val, i32 0 115*9880d681SAndroid Build Coastguard Worker %val1 = extractelement <2 x float> %val, i32 1 116*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 117*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 118*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 119*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 120*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 121*9880d681SAndroid Build Coastguard Worker ret void 122*9880d681SAndroid Build Coastguard Worker} 123*9880d681SAndroid Build Coastguard Worker 124*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_subreg4_f32 125*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} 126*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} 127*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 128*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 129*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { 130*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 131*9880d681SAndroid Build Coastguard Worker %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i 132*9880d681SAndroid Build Coastguard Worker %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16 133*9880d681SAndroid Build Coastguard Worker %val0 = extractelement <4 x float> %val, i32 0 134*9880d681SAndroid Build Coastguard Worker %val1 = extractelement <4 x float> %val, i32 3 135*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 136*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 137*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 138*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 139*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 140*9880d681SAndroid Build Coastguard Worker ret void 141*9880d681SAndroid Build Coastguard Worker} 142*9880d681SAndroid Build Coastguard Worker 143*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_max_offset_f32 144*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 145*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 146*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} 147*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 148*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 149*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { 150*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 151*9880d681SAndroid Build Coastguard Worker %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i 152*9880d681SAndroid Build Coastguard Worker %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 153*9880d681SAndroid Build Coastguard Worker %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4 154*9880d681SAndroid Build Coastguard Worker %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4 155*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 156*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 157*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 255 158*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 159*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 160*9880d681SAndroid Build Coastguard Worker ret void 161*9880d681SAndroid Build Coastguard Worker} 162*9880d681SAndroid Build Coastguard Worker 163*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_too_far_f32 164*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} 165*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 166*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 167*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { 168*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 169*9880d681SAndroid Build Coastguard Worker %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i 170*9880d681SAndroid Build Coastguard Worker %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i 171*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(1)* %in0.gep, align 4 172*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(1)* %in1.gep, align 4 173*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i 174*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 175*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 257 176*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x 177*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 178*9880d681SAndroid Build Coastguard Worker ret void 179*9880d681SAndroid Build Coastguard Worker} 180*9880d681SAndroid Build Coastguard Worker 181*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_x2 182*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset1:11 183*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27 184*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 185*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { 186*9880d681SAndroid Build Coastguard Worker %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 187*9880d681SAndroid Build Coastguard Worker %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x 188*9880d681SAndroid Build Coastguard Worker %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x 189*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(1)* %in0.gep, align 4 190*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(1)* %in1.gep, align 4 191*9880d681SAndroid Build Coastguard Worker 192*9880d681SAndroid Build Coastguard Worker %idx.0 = add nsw i32 %tid.x, 0 193*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 194*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 195*9880d681SAndroid Build Coastguard Worker 196*9880d681SAndroid Build Coastguard Worker %idx.1 = add nsw i32 %tid.x, 8 197*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1 198*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 199*9880d681SAndroid Build Coastguard Worker 200*9880d681SAndroid Build Coastguard Worker %idx.2 = add nsw i32 %tid.x, 11 201*9880d681SAndroid Build Coastguard Worker %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2 202*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx2, align 4 203*9880d681SAndroid Build Coastguard Worker 204*9880d681SAndroid Build Coastguard Worker %idx.3 = add nsw i32 %tid.x, 27 205*9880d681SAndroid Build Coastguard Worker %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3 206*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx3, align 4 207*9880d681SAndroid Build Coastguard Worker 208*9880d681SAndroid Build Coastguard Worker ret void 209*9880d681SAndroid Build Coastguard Worker} 210*9880d681SAndroid Build Coastguard Worker 211*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base 212*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset0:3 offset1:11 213*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27 214*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 215*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { 216*9880d681SAndroid Build Coastguard Worker %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 217*9880d681SAndroid Build Coastguard Worker %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x 218*9880d681SAndroid Build Coastguard Worker %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x 219*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(1)* %in0.gep, align 4 220*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(1)* %in1.gep, align 4 221*9880d681SAndroid Build Coastguard Worker 222*9880d681SAndroid Build Coastguard Worker %idx.0 = add nsw i32 %tid.x, 3 223*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 224*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx0, align 4 225*9880d681SAndroid Build Coastguard Worker 226*9880d681SAndroid Build Coastguard Worker %idx.1 = add nsw i32 %tid.x, 8 227*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1 228*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx1, align 4 229*9880d681SAndroid Build Coastguard Worker 230*9880d681SAndroid Build Coastguard Worker %idx.2 = add nsw i32 %tid.x, 11 231*9880d681SAndroid Build Coastguard Worker %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2 232*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %arrayidx2, align 4 233*9880d681SAndroid Build Coastguard Worker 234*9880d681SAndroid Build Coastguard Worker %idx.3 = add nsw i32 %tid.x, 27 235*9880d681SAndroid Build Coastguard Worker %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3 236*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %arrayidx3, align 4 237*9880d681SAndroid Build Coastguard Worker 238*9880d681SAndroid Build Coastguard Worker ret void 239*9880d681SAndroid Build Coastguard Worker} 240*9880d681SAndroid Build Coastguard Worker 241*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32 242*9880d681SAndroid Build Coastguard Worker; SI-NOT: ds_write2_b32 243*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 244*9880d681SAndroid Build Coastguard Worker; SI: ds_write_b32 245*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 246*9880d681SAndroid Build Coastguard Workerdefine void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 { 247*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 248*9880d681SAndroid Build Coastguard Worker %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i 249*9880d681SAndroid Build Coastguard Worker %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i 250*9880d681SAndroid Build Coastguard Worker %val0 = load float, float addrspace(1)* %in0.gep, align 4 251*9880d681SAndroid Build Coastguard Worker %val1 = load float, float addrspace(1)* %in1.gep, align 4 252*9880d681SAndroid Build Coastguard Worker 253*9880d681SAndroid Build Coastguard Worker %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 254*9880d681SAndroid Build Coastguard Worker %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 255*9880d681SAndroid Build Coastguard Worker %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1 256*9880d681SAndroid Build Coastguard Worker %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0 257*9880d681SAndroid Build Coastguard Worker %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1 258*9880d681SAndroid Build Coastguard Worker 259*9880d681SAndroid Build Coastguard Worker ; Apply an additional offset after the vector that will be more obviously folded. 260*9880d681SAndroid Build Coastguard Worker %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8 261*9880d681SAndroid Build Coastguard Worker store float %val0, float addrspace(3)* %gep.0, align 4 262*9880d681SAndroid Build Coastguard Worker 263*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 264*9880d681SAndroid Build Coastguard Worker store float %val1, float addrspace(3)* %gep.1.offset, align 4 265*9880d681SAndroid Build Coastguard Worker ret void 266*9880d681SAndroid Build Coastguard Worker} 267*9880d681SAndroid Build Coastguard Worker 268*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_one_val_f64 269*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], 270*9880d681SAndroid Build Coastguard Worker; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} 271*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8 272*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 273*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { 274*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 275*9880d681SAndroid Build Coastguard Worker %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i 276*9880d681SAndroid Build Coastguard Worker %val = load double, double addrspace(1)* %in.gep, align 8 277*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i 278*9880d681SAndroid Build Coastguard Worker store double %val, double addrspace(3)* %arrayidx0, align 8 279*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 280*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x 281*9880d681SAndroid Build Coastguard Worker store double %val, double addrspace(3)* %arrayidx1, align 8 282*9880d681SAndroid Build Coastguard Worker ret void 283*9880d681SAndroid Build Coastguard Worker} 284*9880d681SAndroid Build Coastguard Worker 285*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @misaligned_simple_write2_one_val_f64 286*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} 287*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} 288*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1 289*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 290*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 291*9880d681SAndroid Build Coastguard Workerdefine void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { 292*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 293*9880d681SAndroid Build Coastguard Worker %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i 294*9880d681SAndroid Build Coastguard Worker %val = load double, double addrspace(1)* %in.gep, align 8 295*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i 296*9880d681SAndroid Build Coastguard Worker store double %val, double addrspace(3)* %arrayidx0, align 4 297*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 7 298*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x 299*9880d681SAndroid Build Coastguard Worker store double %val, double addrspace(3)* %arrayidx1, align 4 300*9880d681SAndroid Build Coastguard Worker ret void 301*9880d681SAndroid Build Coastguard Worker} 302*9880d681SAndroid Build Coastguard Worker 303*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @simple_write2_two_val_f64 304*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 305*9880d681SAndroid Build Coastguard Worker; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 306*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} 307*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 308*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 309*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { 310*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 311*9880d681SAndroid Build Coastguard Worker %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i 312*9880d681SAndroid Build Coastguard Worker %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1 313*9880d681SAndroid Build Coastguard Worker %val0 = load volatile double, double addrspace(1)* %in.gep.0, align 8 314*9880d681SAndroid Build Coastguard Worker %val1 = load volatile double, double addrspace(1)* %in.gep.1, align 8 315*9880d681SAndroid Build Coastguard Worker %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i 316*9880d681SAndroid Build Coastguard Worker store double %val0, double addrspace(3)* %arrayidx0, align 8 317*9880d681SAndroid Build Coastguard Worker %add.x = add nsw i32 %x.i, 8 318*9880d681SAndroid Build Coastguard Worker %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x 319*9880d681SAndroid Build Coastguard Worker store double %val1, double addrspace(3)* %arrayidx1, align 8 320*9880d681SAndroid Build Coastguard Worker ret void 321*9880d681SAndroid Build Coastguard Worker} 322*9880d681SAndroid Build Coastguard Worker 323*9880d681SAndroid Build Coastguard Worker@foo = addrspace(3) global [4 x i32] undef, align 4 324*9880d681SAndroid Build Coastguard Worker 325*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_constant_adjacent_offsets 326*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 327*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 328*9880d681SAndroid Build Coastguard Workerdefine void @store_constant_adjacent_offsets() { 329*9880d681SAndroid Build Coastguard Worker store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 330*9880d681SAndroid Build Coastguard Worker store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 331*9880d681SAndroid Build Coastguard Worker ret void 332*9880d681SAndroid Build Coastguard Worker} 333*9880d681SAndroid Build Coastguard Worker 334*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_constant_disjoint_offsets 335*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}} 336*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 337*9880d681SAndroid Build Coastguard Worker; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2 338*9880d681SAndroid Build Coastguard Workerdefine void @store_constant_disjoint_offsets() { 339*9880d681SAndroid Build Coastguard Worker store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 340*9880d681SAndroid Build Coastguard Worker store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 341*9880d681SAndroid Build Coastguard Worker ret void 342*9880d681SAndroid Build Coastguard Worker} 343*9880d681SAndroid Build Coastguard Worker 344*9880d681SAndroid Build Coastguard Worker@bar = addrspace(3) global [4 x i64] undef, align 4 345*9880d681SAndroid Build Coastguard Worker 346*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_misaligned64_constant_offsets 347*9880d681SAndroid Build Coastguard Worker; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 348*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 349*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 350*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 351*9880d681SAndroid Build Coastguard Workerdefine void @store_misaligned64_constant_offsets() { 352*9880d681SAndroid Build Coastguard Worker store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 353*9880d681SAndroid Build Coastguard Worker store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 354*9880d681SAndroid Build Coastguard Worker ret void 355*9880d681SAndroid Build Coastguard Worker} 356*9880d681SAndroid Build Coastguard Worker 357*9880d681SAndroid Build Coastguard Worker@bar.large = addrspace(3) global [4096 x i64] undef, align 4 358*9880d681SAndroid Build Coastguard Worker 359*9880d681SAndroid Build Coastguard Worker; SI-LABEL: @store_misaligned64_constant_large_offsets 360*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} 361*9880d681SAndroid Build Coastguard Worker; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}} 362*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 363*9880d681SAndroid Build Coastguard Worker; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 364*9880d681SAndroid Build Coastguard Worker; SI: s_endpgm 365*9880d681SAndroid Build Coastguard Workerdefine void @store_misaligned64_constant_large_offsets() { 366*9880d681SAndroid Build Coastguard Worker store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 367*9880d681SAndroid Build Coastguard Worker store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4 368*9880d681SAndroid Build Coastguard Worker ret void 369*9880d681SAndroid Build Coastguard Worker} 370*9880d681SAndroid Build Coastguard Worker 371*9880d681SAndroid Build Coastguard Worker@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4 372*9880d681SAndroid Build Coastguard Worker@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4 373*9880d681SAndroid Build Coastguard Worker 374*9880d681SAndroid Build Coastguard Workerdefine void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 { 375*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 376*9880d681SAndroid Build Coastguard Worker %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1 377*9880d681SAndroid Build Coastguard Worker %val = load float, float addrspace(1)* %in 378*9880d681SAndroid Build Coastguard Worker %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i 379*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx44, align 4 380*9880d681SAndroid Build Coastguard Worker %add47 = add nsw i32 %x.i, 1 381*9880d681SAndroid Build Coastguard Worker %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47 382*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx48, align 4 383*9880d681SAndroid Build Coastguard Worker %add51 = add nsw i32 %x.i, 16 384*9880d681SAndroid Build Coastguard Worker %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51 385*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx52, align 4 386*9880d681SAndroid Build Coastguard Worker %add55 = add nsw i32 %x.i, 17 387*9880d681SAndroid Build Coastguard Worker %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55 388*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx56, align 4 389*9880d681SAndroid Build Coastguard Worker %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i 390*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx60, align 4 391*9880d681SAndroid Build Coastguard Worker %add63 = add nsw i32 %y.i, 1 392*9880d681SAndroid Build Coastguard Worker %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63 393*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx64, align 4 394*9880d681SAndroid Build Coastguard Worker %add67 = add nsw i32 %y.i, 32 395*9880d681SAndroid Build Coastguard Worker %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67 396*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx68, align 4 397*9880d681SAndroid Build Coastguard Worker %add71 = add nsw i32 %y.i, 33 398*9880d681SAndroid Build Coastguard Worker %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71 399*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx72, align 4 400*9880d681SAndroid Build Coastguard Worker %add75 = add nsw i32 %y.i, 64 401*9880d681SAndroid Build Coastguard Worker %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75 402*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx76, align 4 403*9880d681SAndroid Build Coastguard Worker %add79 = add nsw i32 %y.i, 65 404*9880d681SAndroid Build Coastguard Worker %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79 405*9880d681SAndroid Build Coastguard Worker store float %val, float addrspace(3)* %arrayidx80, align 4 406*9880d681SAndroid Build Coastguard Worker ret void 407*9880d681SAndroid Build Coastguard Worker} 408*9880d681SAndroid Build Coastguard Worker 409*9880d681SAndroid Build Coastguard Worker; CI-LABEL: {{^}}simple_write2_v4f32_superreg_align4: 410*9880d681SAndroid Build Coastguard Worker; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:2{{$}} 411*9880d681SAndroid Build Coastguard Worker; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}} 412*9880d681SAndroid Build Coastguard Worker; CI: s_endpgm 413*9880d681SAndroid Build Coastguard Workerdefine void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 { 414*9880d681SAndroid Build Coastguard Worker %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 415*9880d681SAndroid Build Coastguard Worker %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in 416*9880d681SAndroid Build Coastguard Worker %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4 417*9880d681SAndroid Build Coastguard Worker %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i 418*9880d681SAndroid Build Coastguard Worker store <4 x float> %val0, <4 x float> addrspace(3)* %out.gep, align 4 419*9880d681SAndroid Build Coastguard Worker ret void 420*9880d681SAndroid Build Coastguard Worker} 421*9880d681SAndroid Build Coastguard Worker 422*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 423*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.x() #1 424*9880d681SAndroid Build Coastguard Worker 425*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 426*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workgroup.id.y() #1 427*9880d681SAndroid Build Coastguard Worker 428*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 429*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() #1 430*9880d681SAndroid Build Coastguard Worker 431*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind readnone 432*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.y() #1 433*9880d681SAndroid Build Coastguard Worker 434*9880d681SAndroid Build Coastguard Workerattributes #0 = { nounwind } 435*9880d681SAndroid Build Coastguard Workerattributes #1 = { nounwind readnone } 436*9880d681SAndroid Build Coastguard Workerattributes #2 = { convergent nounwind } 437