1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mattr=+neon | FileCheck %s 2*9880d681SAndroid Build Coastguard Workertarget datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" 3*9880d681SAndroid Build Coastguard Workertarget triple = "thumbv7-elf" 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Workerdefine i32 @vget_lanes8(<8 x i8>* %A) nounwind { 6*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vget_lanes8: 7*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.s8 8*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 9*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <8 x i8> %tmp1, i32 1 10*9880d681SAndroid Build Coastguard Worker %tmp3 = sext i8 %tmp2 to i32 11*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 12*9880d681SAndroid Build Coastguard Worker} 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Workerdefine i32 @vget_lanes16(<4 x i16>* %A) nounwind { 15*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vget_lanes16: 16*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.s16 17*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 18*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <4 x i16> %tmp1, i32 1 19*9880d681SAndroid Build Coastguard Worker %tmp3 = sext i16 %tmp2 to i32 20*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 21*9880d681SAndroid Build Coastguard Worker} 22*9880d681SAndroid Build Coastguard Worker 23*9880d681SAndroid Build Coastguard Workerdefine i32 @vget_laneu8(<8 x i8>* %A) nounwind { 24*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vget_laneu8: 25*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.u8 26*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 27*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <8 x i8> %tmp1, i32 1 28*9880d681SAndroid Build Coastguard Worker %tmp3 = zext i8 %tmp2 to i32 29*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 30*9880d681SAndroid Build Coastguard Worker} 31*9880d681SAndroid Build Coastguard Worker 32*9880d681SAndroid Build Coastguard Workerdefine i32 @vget_laneu16(<4 x i16>* %A) nounwind { 33*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vget_laneu16: 34*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.u16 35*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 36*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <4 x i16> %tmp1, i32 1 37*9880d681SAndroid Build Coastguard Worker %tmp3 = zext i16 %tmp2 to i32 38*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 39*9880d681SAndroid Build Coastguard Worker} 40*9880d681SAndroid Build Coastguard Worker 41*9880d681SAndroid Build Coastguard Worker; Do a vector add to keep the extraction from being done directly from memory. 42*9880d681SAndroid Build Coastguard Workerdefine i32 @vget_lanei32(<2 x i32>* %A) nounwind { 43*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vget_lanei32: 44*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.32 45*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 46*9880d681SAndroid Build Coastguard Worker %tmp2 = add <2 x i32> %tmp1, %tmp1 47*9880d681SAndroid Build Coastguard Worker %tmp3 = extractelement <2 x i32> %tmp2, i32 1 48*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 49*9880d681SAndroid Build Coastguard Worker} 50*9880d681SAndroid Build Coastguard Worker 51*9880d681SAndroid Build Coastguard Workerdefine i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind { 52*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vgetQ_lanes8: 53*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.s8 54*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 55*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <16 x i8> %tmp1, i32 1 56*9880d681SAndroid Build Coastguard Worker %tmp3 = sext i8 %tmp2 to i32 57*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 58*9880d681SAndroid Build Coastguard Worker} 59*9880d681SAndroid Build Coastguard Worker 60*9880d681SAndroid Build Coastguard Workerdefine i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind { 61*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vgetQ_lanes16: 62*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.s16 63*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 64*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <8 x i16> %tmp1, i32 1 65*9880d681SAndroid Build Coastguard Worker %tmp3 = sext i16 %tmp2 to i32 66*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 67*9880d681SAndroid Build Coastguard Worker} 68*9880d681SAndroid Build Coastguard Worker 69*9880d681SAndroid Build Coastguard Workerdefine i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind { 70*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vgetQ_laneu8: 71*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.u8 72*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 73*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <16 x i8> %tmp1, i32 1 74*9880d681SAndroid Build Coastguard Worker %tmp3 = zext i8 %tmp2 to i32 75*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 76*9880d681SAndroid Build Coastguard Worker} 77*9880d681SAndroid Build Coastguard Worker 78*9880d681SAndroid Build Coastguard Workerdefine i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind { 79*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vgetQ_laneu16: 80*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.u16 81*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 82*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <8 x i16> %tmp1, i32 1 83*9880d681SAndroid Build Coastguard Worker %tmp3 = zext i16 %tmp2 to i32 84*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 85*9880d681SAndroid Build Coastguard Worker} 86*9880d681SAndroid Build Coastguard Worker 87*9880d681SAndroid Build Coastguard Worker; Do a vector add to keep the extraction from being done directly from memory. 88*9880d681SAndroid Build Coastguard Workerdefine i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind { 89*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vgetQ_lanei32: 90*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.32 91*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 92*9880d681SAndroid Build Coastguard Worker %tmp2 = add <4 x i32> %tmp1, %tmp1 93*9880d681SAndroid Build Coastguard Worker %tmp3 = extractelement <4 x i32> %tmp2, i32 1 94*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 95*9880d681SAndroid Build Coastguard Worker} 96*9880d681SAndroid Build Coastguard Worker 97*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc void @test_vget_laneu16() nounwind { 98*9880d681SAndroid Build Coastguard Workerentry: 99*9880d681SAndroid Build Coastguard Worker; CHECK: vmov.u16 r0, d{{.*}}[1] 100*9880d681SAndroid Build Coastguard Worker %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] 101*9880d681SAndroid Build Coastguard Worker %out_uint16_t = alloca i16 ; <i16*> [#uses=1] 102*9880d681SAndroid Build Coastguard Worker %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] 103*9880d681SAndroid Build Coastguard Worker %0 = load <4 x i16>, <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1] 104*9880d681SAndroid Build Coastguard Worker %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1] 105*9880d681SAndroid Build Coastguard Worker %2 = add i16 %1, %1 106*9880d681SAndroid Build Coastguard Worker store i16 %2, i16* %out_uint16_t, align 2 107*9880d681SAndroid Build Coastguard Worker br label %return 108*9880d681SAndroid Build Coastguard Worker 109*9880d681SAndroid Build Coastguard Workerreturn: ; preds = %entry 110*9880d681SAndroid Build Coastguard Worker ret void 111*9880d681SAndroid Build Coastguard Worker} 112*9880d681SAndroid Build Coastguard Worker 113*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc void @test_vget_laneu8() nounwind { 114*9880d681SAndroid Build Coastguard Workerentry: 115*9880d681SAndroid Build Coastguard Worker; CHECK: vmov.u8 r0, d{{.*}}[1] 116*9880d681SAndroid Build Coastguard Worker %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1] 117*9880d681SAndroid Build Coastguard Worker %out_uint8_t = alloca i8 ; <i8*> [#uses=1] 118*9880d681SAndroid Build Coastguard Worker %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] 119*9880d681SAndroid Build Coastguard Worker %0 = load <8 x i8>, <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1] 120*9880d681SAndroid Build Coastguard Worker %1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1] 121*9880d681SAndroid Build Coastguard Worker %2 = add i8 %1, %1 122*9880d681SAndroid Build Coastguard Worker store i8 %2, i8* %out_uint8_t, align 1 123*9880d681SAndroid Build Coastguard Worker br label %return 124*9880d681SAndroid Build Coastguard Worker 125*9880d681SAndroid Build Coastguard Workerreturn: ; preds = %entry 126*9880d681SAndroid Build Coastguard Worker ret void 127*9880d681SAndroid Build Coastguard Worker} 128*9880d681SAndroid Build Coastguard Worker 129*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind { 130*9880d681SAndroid Build Coastguard Workerentry: 131*9880d681SAndroid Build Coastguard Worker; CHECK: vmov.u16 r0, d{{.*}}[1] 132*9880d681SAndroid Build Coastguard Worker %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1] 133*9880d681SAndroid Build Coastguard Worker %out_uint16_t = alloca i16 ; <i16*> [#uses=1] 134*9880d681SAndroid Build Coastguard Worker %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] 135*9880d681SAndroid Build Coastguard Worker %0 = load <8 x i16>, <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1] 136*9880d681SAndroid Build Coastguard Worker %1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1] 137*9880d681SAndroid Build Coastguard Worker %2 = add i16 %1, %1 138*9880d681SAndroid Build Coastguard Worker store i16 %2, i16* %out_uint16_t, align 2 139*9880d681SAndroid Build Coastguard Worker br label %return 140*9880d681SAndroid Build Coastguard Worker 141*9880d681SAndroid Build Coastguard Workerreturn: ; preds = %entry 142*9880d681SAndroid Build Coastguard Worker ret void 143*9880d681SAndroid Build Coastguard Worker} 144*9880d681SAndroid Build Coastguard Worker 145*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind { 146*9880d681SAndroid Build Coastguard Workerentry: 147*9880d681SAndroid Build Coastguard Worker; CHECK: vmov.u8 r0, d{{.*}}[1] 148*9880d681SAndroid Build Coastguard Worker %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1] 149*9880d681SAndroid Build Coastguard Worker %out_uint8_t = alloca i8 ; <i8*> [#uses=1] 150*9880d681SAndroid Build Coastguard Worker %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] 151*9880d681SAndroid Build Coastguard Worker %0 = load <16 x i8>, <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1] 152*9880d681SAndroid Build Coastguard Worker %1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1] 153*9880d681SAndroid Build Coastguard Worker %2 = add i8 %1, %1 154*9880d681SAndroid Build Coastguard Worker store i8 %2, i8* %out_uint8_t, align 1 155*9880d681SAndroid Build Coastguard Worker br label %return 156*9880d681SAndroid Build Coastguard Worker 157*9880d681SAndroid Build Coastguard Workerreturn: ; preds = %entry 158*9880d681SAndroid Build Coastguard Worker ret void 159*9880d681SAndroid Build Coastguard Worker} 160*9880d681SAndroid Build Coastguard Worker 161*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind { 162*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vset_lane8: 163*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.8 164*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 165*9880d681SAndroid Build Coastguard Worker %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1 166*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp2 167*9880d681SAndroid Build Coastguard Worker} 168*9880d681SAndroid Build Coastguard Worker 169*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind { 170*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vset_lane16: 171*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.16 172*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 173*9880d681SAndroid Build Coastguard Worker %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1 174*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp2 175*9880d681SAndroid Build Coastguard Worker} 176*9880d681SAndroid Build Coastguard Worker 177*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind { 178*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vset_lane32: 179*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.32 180*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 181*9880d681SAndroid Build Coastguard Worker %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1 182*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp2 183*9880d681SAndroid Build Coastguard Worker} 184*9880d681SAndroid Build Coastguard Worker 185*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind { 186*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vsetQ_lane8: 187*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.8 188*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 189*9880d681SAndroid Build Coastguard Worker %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1 190*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp2 191*9880d681SAndroid Build Coastguard Worker} 192*9880d681SAndroid Build Coastguard Worker 193*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind { 194*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vsetQ_lane16: 195*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.16 196*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 197*9880d681SAndroid Build Coastguard Worker %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1 198*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp2 199*9880d681SAndroid Build Coastguard Worker} 200*9880d681SAndroid Build Coastguard Worker 201*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { 202*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vsetQ_lane32: 203*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.32 d{{.*}}[1], r1 204*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 205*9880d681SAndroid Build Coastguard Worker %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1 206*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp2 207*9880d681SAndroid Build Coastguard Worker} 208*9880d681SAndroid Build Coastguard Worker 209*9880d681SAndroid Build Coastguard Workerdefine arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind { 210*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: test_vset_lanef32: 211*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.f32 s3, s0 212*9880d681SAndroid Build Coastguard Worker;CHECK: vmov.f64 d0, d1 213*9880d681SAndroid Build Coastguard Workerentry: 214*9880d681SAndroid Build Coastguard Worker %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1] 215*9880d681SAndroid Build Coastguard Worker ret <2 x float> %0 216*9880d681SAndroid Build Coastguard Worker} 217*9880d681SAndroid Build Coastguard Worker 218*9880d681SAndroid Build Coastguard Worker; The llvm extractelement instruction does not require that the lane number 219*9880d681SAndroid Build Coastguard Worker; be an immediate constant. Make sure a variable lane number is handled. 220*9880d681SAndroid Build Coastguard Worker 221*9880d681SAndroid Build Coastguard Workerdefine i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind { 222*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 223*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <8 x i8> %tmp1, i32 %B 224*9880d681SAndroid Build Coastguard Worker %tmp3 = sext i8 %tmp2 to i32 225*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 226*9880d681SAndroid Build Coastguard Worker} 227*9880d681SAndroid Build Coastguard Worker 228*9880d681SAndroid Build Coastguard Workerdefine i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind { 229*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 230*9880d681SAndroid Build Coastguard Worker %tmp2 = add <4 x i32> %tmp1, %tmp1 231*9880d681SAndroid Build Coastguard Worker %tmp3 = extractelement <4 x i32> %tmp2, i32 %B 232*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 233*9880d681SAndroid Build Coastguard Worker} 234