1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Worker; 4*9880d681SAndroid Build Coastguard Worker; kernel void combine_vloads(global char8* src, global char8* result) { 5*9880d681SAndroid Build Coastguard Worker; for (int i = 0; i < 1024; ++i) 6*9880d681SAndroid Build Coastguard Worker; result[i] = src[0] + src[1] + src[2] + src[3]; 7*9880d681SAndroid Build Coastguard Worker; } 8*9880d681SAndroid Build Coastguard Worker; 9*9880d681SAndroid Build Coastguard Worker 10*9880d681SAndroid Build Coastguard Worker 11*9880d681SAndroid Build Coastguard Worker; 128-bit loads instead of many 8-bit 12*9880d681SAndroid Build Coastguard Worker; EG-LABEL: {{^}}combine_vloads: 13*9880d681SAndroid Build Coastguard Worker; EG: VTX_READ_128 14*9880d681SAndroid Build Coastguard Worker; EG: VTX_READ_128 15*9880d681SAndroid Build Coastguard Workerdefine void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind { 16*9880d681SAndroid Build Coastguard Workerentry: 17*9880d681SAndroid Build Coastguard Worker br label %for.body 18*9880d681SAndroid Build Coastguard Worker 19*9880d681SAndroid Build Coastguard Workerfor.exit: ; preds = %for.body 20*9880d681SAndroid Build Coastguard Worker ret void 21*9880d681SAndroid Build Coastguard Worker 22*9880d681SAndroid Build Coastguard Workerfor.body: ; preds = %for.body, %entry 23*9880d681SAndroid Build Coastguard Worker %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ] 24*9880d681SAndroid Build Coastguard Worker %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)* 25*9880d681SAndroid Build Coastguard Worker %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)* 26*9880d681SAndroid Build Coastguard Worker %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32 27*9880d681SAndroid Build Coastguard Worker %1 = bitcast <8 x i32> %vecload2 to <32 x i8> 28*9880d681SAndroid Build Coastguard Worker %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 29*9880d681SAndroid Build Coastguard Worker %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 30*9880d681SAndroid Build Coastguard Worker %tmp9 = add nsw <8 x i8> %tmp5, %tmp8 31*9880d681SAndroid Build Coastguard Worker %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 32*9880d681SAndroid Build Coastguard Worker %tmp13 = add nsw <8 x i8> %tmp9, %tmp12 33*9880d681SAndroid Build Coastguard Worker %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 34*9880d681SAndroid Build Coastguard Worker %tmp17 = add nsw <8 x i8> %tmp13, %tmp16 35*9880d681SAndroid Build Coastguard Worker %scevgep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %result, i32 %i.01 36*9880d681SAndroid Build Coastguard Worker %2 = bitcast <8 x i8> %tmp17 to <2 x i32> 37*9880d681SAndroid Build Coastguard Worker %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)* 38*9880d681SAndroid Build Coastguard Worker store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8 39*9880d681SAndroid Build Coastguard Worker %tmp19 = add nsw i32 %i.01, 1 40*9880d681SAndroid Build Coastguard Worker %exitcond = icmp eq i32 %tmp19, 1024 41*9880d681SAndroid Build Coastguard Worker br i1 %exitcond, label %for.exit, label %for.body 42*9880d681SAndroid Build Coastguard Worker} 43