1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X32 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X64 3*9880d681SAndroid Build Coastguard Worker 4*9880d681SAndroid Build Coastguard Worker@g16 = external global i16 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind { 7*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pinsrd_1: 8*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 9*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 10*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 11*9880d681SAndroid Build Coastguard Worker; 12*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pinsrd_1: 13*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 14*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pinsrd $1, %edi, %xmm0 15*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 16*9880d681SAndroid Build Coastguard Worker %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1 17*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp1 18*9880d681SAndroid Build Coastguard Worker} 19*9880d681SAndroid Build Coastguard Worker 20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind { 21*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pinsrb_1: 22*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 23*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 24*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 25*9880d681SAndroid Build Coastguard Worker; 26*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pinsrb_1: 27*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 28*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pinsrb $1, %edi, %xmm0 29*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 30*9880d681SAndroid Build Coastguard Worker %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1 31*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp1 32*9880d681SAndroid Build Coastguard Worker} 33*9880d681SAndroid Build Coastguard Worker 34*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @pmovzxbq_1() nounwind { 35*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pmovzxbq_1: 36*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 37*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl L_g16$non_lazy_ptr, %eax 38*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 39*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 40*9880d681SAndroid Build Coastguard Worker; 41*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pmovzxbq_1: 42*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 43*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movq _g16@{{.*}}(%rip), %rax 44*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 45*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 46*9880d681SAndroid Build Coastguard Workerentry: 47*9880d681SAndroid Build Coastguard Worker %0 = load i16, i16* @g16, align 2 ; <i16> [#uses=1] 48*9880d681SAndroid Build Coastguard Worker %1 = insertelement <8 x i16> undef, i16 %0, i32 0 ; <<8 x i16>> [#uses=1] 49*9880d681SAndroid Build Coastguard Worker %2 = bitcast <8 x i16> %1 to <16 x i8> ; <<16 x i8>> [#uses=1] 50*9880d681SAndroid Build Coastguard Worker %3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone ; <<2 x i64>> [#uses=1] 51*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %3 52*9880d681SAndroid Build Coastguard Worker} 53*9880d681SAndroid Build Coastguard Worker 54*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 55*9880d681SAndroid Build Coastguard Worker 56*9880d681SAndroid Build Coastguard Workerdefine i32 @extractps_1(<4 x float> %v) nounwind { 57*9880d681SAndroid Build Coastguard Worker; X32-LABEL: extractps_1: 58*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 59*9880d681SAndroid Build Coastguard Worker; X32-NEXT: extractps $3, %xmm0, %eax 60*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 61*9880d681SAndroid Build Coastguard Worker; 62*9880d681SAndroid Build Coastguard Worker; X64-LABEL: extractps_1: 63*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 64*9880d681SAndroid Build Coastguard Worker; X64-NEXT: extractps $3, %xmm0, %eax 65*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 66*9880d681SAndroid Build Coastguard Worker %s = extractelement <4 x float> %v, i32 3 67*9880d681SAndroid Build Coastguard Worker %i = bitcast float %s to i32 68*9880d681SAndroid Build Coastguard Worker ret i32 %i 69*9880d681SAndroid Build Coastguard Worker} 70*9880d681SAndroid Build Coastguard Workerdefine i32 @extractps_2(<4 x float> %v) nounwind { 71*9880d681SAndroid Build Coastguard Worker; X32-LABEL: extractps_2: 72*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 73*9880d681SAndroid Build Coastguard Worker; X32-NEXT: extractps $3, %xmm0, %eax 74*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 75*9880d681SAndroid Build Coastguard Worker; 76*9880d681SAndroid Build Coastguard Worker; X64-LABEL: extractps_2: 77*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 78*9880d681SAndroid Build Coastguard Worker; X64-NEXT: extractps $3, %xmm0, %eax 79*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 80*9880d681SAndroid Build Coastguard Worker %t = bitcast <4 x float> %v to <4 x i32> 81*9880d681SAndroid Build Coastguard Worker %s = extractelement <4 x i32> %t, i32 3 82*9880d681SAndroid Build Coastguard Worker ret i32 %s 83*9880d681SAndroid Build Coastguard Worker} 84*9880d681SAndroid Build Coastguard Worker 85*9880d681SAndroid Build Coastguard Worker 86*9880d681SAndroid Build Coastguard Worker; The non-store form of extractps puts its result into a GPR. 87*9880d681SAndroid Build Coastguard Worker; This makes it suitable for an extract from a <4 x float> that 88*9880d681SAndroid Build Coastguard Worker; is bitcasted to i32, but unsuitable for much of anything else. 89*9880d681SAndroid Build Coastguard Worker 90*9880d681SAndroid Build Coastguard Workerdefine float @ext_1(<4 x float> %v) nounwind { 91*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ext_1: 92*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 93*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pushl %eax 94*9880d681SAndroid Build Coastguard Worker; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 95*9880d681SAndroid Build Coastguard Worker; X32-NEXT: addss LCPI5_0, %xmm0 96*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movss %xmm0, (%esp) 97*9880d681SAndroid Build Coastguard Worker; X32-NEXT: flds (%esp) 98*9880d681SAndroid Build Coastguard Worker; X32-NEXT: popl %eax 99*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 100*9880d681SAndroid Build Coastguard Worker; 101*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ext_1: 102*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 103*9880d681SAndroid Build Coastguard Worker; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 104*9880d681SAndroid Build Coastguard Worker; X64-NEXT: addss {{.*}}(%rip), %xmm0 105*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 106*9880d681SAndroid Build Coastguard Worker %s = extractelement <4 x float> %v, i32 3 107*9880d681SAndroid Build Coastguard Worker %t = fadd float %s, 1.0 108*9880d681SAndroid Build Coastguard Worker ret float %t 109*9880d681SAndroid Build Coastguard Worker} 110*9880d681SAndroid Build Coastguard Workerdefine float @ext_2(<4 x float> %v) nounwind { 111*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ext_2: 112*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 113*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pushl %eax 114*9880d681SAndroid Build Coastguard Worker; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 115*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movss %xmm0, (%esp) 116*9880d681SAndroid Build Coastguard Worker; X32-NEXT: flds (%esp) 117*9880d681SAndroid Build Coastguard Worker; X32-NEXT: popl %eax 118*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 119*9880d681SAndroid Build Coastguard Worker; 120*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ext_2: 121*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 122*9880d681SAndroid Build Coastguard Worker; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 123*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 124*9880d681SAndroid Build Coastguard Worker %s = extractelement <4 x float> %v, i32 3 125*9880d681SAndroid Build Coastguard Worker ret float %s 126*9880d681SAndroid Build Coastguard Worker} 127*9880d681SAndroid Build Coastguard Workerdefine i32 @ext_3(<4 x i32> %v) nounwind { 128*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ext_3: 129*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 130*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pextrd $3, %xmm0, %eax 131*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 132*9880d681SAndroid Build Coastguard Worker; 133*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ext_3: 134*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 135*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pextrd $3, %xmm0, %eax 136*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 137*9880d681SAndroid Build Coastguard Worker %i = extractelement <4 x i32> %v, i32 3 138*9880d681SAndroid Build Coastguard Worker ret i32 %i 139*9880d681SAndroid Build Coastguard Worker} 140*9880d681SAndroid Build Coastguard Worker 141*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind { 142*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_1: 143*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 144*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3] 145*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 146*9880d681SAndroid Build Coastguard Worker; 147*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_1: 148*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 149*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3] 150*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 151*9880d681SAndroid Build Coastguard Worker %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 21) nounwind readnone 152*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp1 153*9880d681SAndroid Build Coastguard Worker} 154*9880d681SAndroid Build Coastguard Worker 155*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 156*9880d681SAndroid Build Coastguard Worker 157*9880d681SAndroid Build Coastguard Worker; When optimizing for speed, prefer blendps over insertps even if it means we have to 158*9880d681SAndroid Build Coastguard Worker; generate a separate movss to load the scalar operand. 159*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind { 160*9880d681SAndroid Build Coastguard Worker; X32-LABEL: blendps_not_insertps_1: 161*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 162*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 163*9880d681SAndroid Build Coastguard Worker; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 164*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 165*9880d681SAndroid Build Coastguard Worker; 166*9880d681SAndroid Build Coastguard Worker; X64-LABEL: blendps_not_insertps_1: 167*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 168*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 169*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 170*9880d681SAndroid Build Coastguard Worker %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0 171*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp1 172*9880d681SAndroid Build Coastguard Worker} 173*9880d681SAndroid Build Coastguard Worker 174*9880d681SAndroid Build Coastguard Worker; When optimizing for size, generate an insertps if there's a load fold opportunity. 175*9880d681SAndroid Build Coastguard Worker; The difference between i386 and x86-64 ABIs for the float operand means we should 176*9880d681SAndroid Build Coastguard Worker; generate an insertps for X32 but not for X64! 177*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize nounwind { 178*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_or_blendps: 179*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 180*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3] 181*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 182*9880d681SAndroid Build Coastguard Worker; 183*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_or_blendps: 184*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 185*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 186*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 187*9880d681SAndroid Build Coastguard Worker %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0 188*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp1 189*9880d681SAndroid Build Coastguard Worker} 190*9880d681SAndroid Build Coastguard Worker 191*9880d681SAndroid Build Coastguard Worker; An insert into the low 32-bits of a vector from the low 32-bits of another vector 192*9880d681SAndroid Build Coastguard Worker; is always just a blendps because blendps is never more expensive than insertps. 193*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @blendps_not_insertps_2(<4 x float> %t1, <4 x float> %t2) nounwind { 194*9880d681SAndroid Build Coastguard Worker; X32-LABEL: blendps_not_insertps_2: 195*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 196*9880d681SAndroid Build Coastguard Worker; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 197*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 198*9880d681SAndroid Build Coastguard Worker; 199*9880d681SAndroid Build Coastguard Worker; X64-LABEL: blendps_not_insertps_2: 200*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 201*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 202*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 203*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <4 x float> %t2, i32 0 204*9880d681SAndroid Build Coastguard Worker %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0 205*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp1 206*9880d681SAndroid Build Coastguard Worker} 207*9880d681SAndroid Build Coastguard Worker 208*9880d681SAndroid Build Coastguard Workerdefine i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind { 209*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ptestz_1: 210*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 211*9880d681SAndroid Build Coastguard Worker; X32-NEXT: xorl %eax, %eax 212*9880d681SAndroid Build Coastguard Worker; X32-NEXT: ptest %xmm1, %xmm0 213*9880d681SAndroid Build Coastguard Worker; X32-NEXT: sete %al 214*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 215*9880d681SAndroid Build Coastguard Worker; 216*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ptestz_1: 217*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 218*9880d681SAndroid Build Coastguard Worker; X64-NEXT: xorl %eax, %eax 219*9880d681SAndroid Build Coastguard Worker; X64-NEXT: ptest %xmm1, %xmm0 220*9880d681SAndroid Build Coastguard Worker; X64-NEXT: sete %al 221*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 222*9880d681SAndroid Build Coastguard Worker %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone 223*9880d681SAndroid Build Coastguard Worker ret i32 %tmp1 224*9880d681SAndroid Build Coastguard Worker} 225*9880d681SAndroid Build Coastguard Worker 226*9880d681SAndroid Build Coastguard Workerdefine i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind { 227*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ptestz_2: 228*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 229*9880d681SAndroid Build Coastguard Worker; X32-NEXT: ptest %xmm1, %xmm0 230*9880d681SAndroid Build Coastguard Worker; X32-NEXT: sbbl %eax, %eax 231*9880d681SAndroid Build Coastguard Worker; X32-NEXT: andl $1, %eax 232*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 233*9880d681SAndroid Build Coastguard Worker; 234*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ptestz_2: 235*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 236*9880d681SAndroid Build Coastguard Worker; X64-NEXT: ptest %xmm1, %xmm0 237*9880d681SAndroid Build Coastguard Worker; X64-NEXT: sbbl %eax, %eax 238*9880d681SAndroid Build Coastguard Worker; X64-NEXT: andl $1, %eax 239*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 240*9880d681SAndroid Build Coastguard Worker %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone 241*9880d681SAndroid Build Coastguard Worker ret i32 %tmp1 242*9880d681SAndroid Build Coastguard Worker} 243*9880d681SAndroid Build Coastguard Worker 244*9880d681SAndroid Build Coastguard Workerdefine i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind { 245*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ptestz_3: 246*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 247*9880d681SAndroid Build Coastguard Worker; X32-NEXT: xorl %eax, %eax 248*9880d681SAndroid Build Coastguard Worker; X32-NEXT: ptest %xmm1, %xmm0 249*9880d681SAndroid Build Coastguard Worker; X32-NEXT: seta %al 250*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 251*9880d681SAndroid Build Coastguard Worker; 252*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ptestz_3: 253*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 254*9880d681SAndroid Build Coastguard Worker; X64-NEXT: xorl %eax, %eax 255*9880d681SAndroid Build Coastguard Worker; X64-NEXT: ptest %xmm1, %xmm0 256*9880d681SAndroid Build Coastguard Worker; X64-NEXT: seta %al 257*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 258*9880d681SAndroid Build Coastguard Worker %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone 259*9880d681SAndroid Build Coastguard Worker ret i32 %tmp1 260*9880d681SAndroid Build Coastguard Worker} 261*9880d681SAndroid Build Coastguard Worker 262*9880d681SAndroid Build Coastguard Worker 263*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 264*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 265*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 266*9880d681SAndroid Build Coastguard Worker 267*9880d681SAndroid Build Coastguard Worker; This used to compile to insertps $0 + insertps $16. insertps $0 is always 268*9880d681SAndroid Build Coastguard Worker; pointless. 269*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { 270*9880d681SAndroid Build Coastguard Worker; X32-LABEL: buildvector: 271*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 272*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 273*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 274*9880d681SAndroid Build Coastguard Worker; X32-NEXT: addss %xmm1, %xmm0 275*9880d681SAndroid Build Coastguard Worker; X32-NEXT: addss %xmm2, %xmm3 276*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 277*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 278*9880d681SAndroid Build Coastguard Worker; 279*9880d681SAndroid Build Coastguard Worker; X64-LABEL: buildvector: 280*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 281*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 282*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 283*9880d681SAndroid Build Coastguard Worker; X64-NEXT: addss %xmm1, %xmm0 284*9880d681SAndroid Build Coastguard Worker; X64-NEXT: addss %xmm2, %xmm3 285*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 286*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 287*9880d681SAndroid Build Coastguard Workerentry: 288*9880d681SAndroid Build Coastguard Worker %tmp7 = extractelement <2 x float> %A, i32 0 289*9880d681SAndroid Build Coastguard Worker %tmp5 = extractelement <2 x float> %A, i32 1 290*9880d681SAndroid Build Coastguard Worker %tmp3 = extractelement <2 x float> %B, i32 0 291*9880d681SAndroid Build Coastguard Worker %tmp1 = extractelement <2 x float> %B, i32 1 292*9880d681SAndroid Build Coastguard Worker %add.r = fadd float %tmp7, %tmp3 293*9880d681SAndroid Build Coastguard Worker %add.i = fadd float %tmp5, %tmp1 294*9880d681SAndroid Build Coastguard Worker %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 295*9880d681SAndroid Build Coastguard Worker %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 296*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp9 297*9880d681SAndroid Build Coastguard Worker} 298*9880d681SAndroid Build Coastguard Worker 299*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_shufflevector_1(<4 x float> %a, <4 x float>* nocapture readonly %pb) { 300*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_shufflevector_1: 301*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 302*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 303*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 304*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 305*9880d681SAndroid Build Coastguard Worker; 306*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_shufflevector_1: 307*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 308*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 309*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 310*9880d681SAndroid Build Coastguard Workerentry: 311*9880d681SAndroid Build Coastguard Worker %0 = load <4 x float>, <4 x float>* %pb, align 16 312*9880d681SAndroid Build Coastguard Worker %vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 313*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit6 314*9880d681SAndroid Build Coastguard Worker} 315*9880d681SAndroid Build Coastguard Worker 316*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_shufflevector_2(<4 x float> %a, <4 x float> %b) { 317*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_shufflevector_2: 318*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 319*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3] 320*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 321*9880d681SAndroid Build Coastguard Worker; 322*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_shufflevector_2: 323*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 324*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3] 325*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 326*9880d681SAndroid Build Coastguard Workerentry: 327*9880d681SAndroid Build Coastguard Worker %vecinit6 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3> 328*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit6 329*9880d681SAndroid Build Coastguard Worker} 330*9880d681SAndroid Build Coastguard Worker 331*9880d681SAndroid Build Coastguard Worker; For loading an i32 from memory into an xmm register we use pinsrd 332*9880d681SAndroid Build Coastguard Worker; instead of insertps 333*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocapture readonly %pb) { 334*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pinsrd_from_shufflevector_i32: 335*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 336*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 337*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = mem[0,1,2,0] 338*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 339*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 340*9880d681SAndroid Build Coastguard Worker; 341*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pinsrd_from_shufflevector_i32: 342*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 343*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = mem[0,1,2,0] 344*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 345*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 346*9880d681SAndroid Build Coastguard Workerentry: 347*9880d681SAndroid Build Coastguard Worker %0 = load <4 x i32>, <4 x i32>* %pb, align 16 348*9880d681SAndroid Build Coastguard Worker %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 349*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit6 350*9880d681SAndroid Build Coastguard Worker} 351*9880d681SAndroid Build Coastguard Worker 352*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) { 353*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_shufflevector_i32_2: 354*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 355*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 356*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 357*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 358*9880d681SAndroid Build Coastguard Worker; 359*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_shufflevector_i32_2: 360*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 361*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 362*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 363*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 364*9880d681SAndroid Build Coastguard Workerentry: 365*9880d681SAndroid Build Coastguard Worker %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3> 366*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit6 367*9880d681SAndroid Build Coastguard Worker} 368*9880d681SAndroid Build Coastguard Worker 369*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_load_ins_elt_undef(<4 x float> %a, float* %b) { 370*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_load_ins_elt_undef: 371*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 372*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 373*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 374*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 375*9880d681SAndroid Build Coastguard Worker; 376*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_load_ins_elt_undef: 377*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 378*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 379*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 380*9880d681SAndroid Build Coastguard Worker %1 = load float, float* %b, align 4 381*9880d681SAndroid Build Coastguard Worker %2 = insertelement <4 x float> undef, float %1, i32 0 382*9880d681SAndroid Build Coastguard Worker %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 383*9880d681SAndroid Build Coastguard Worker ret <4 x float> %result 384*9880d681SAndroid Build Coastguard Worker} 385*9880d681SAndroid Build Coastguard Worker 386*9880d681SAndroid Build Coastguard Worker; TODO: Like on pinsrd_from_shufflevector_i32, remove this mov instr 387*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) { 388*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_load_ins_elt_undef_i32: 389*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 390*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 391*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 392*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 393*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 394*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 395*9880d681SAndroid Build Coastguard Worker; 396*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_load_ins_elt_undef_i32: 397*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 398*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 399*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 400*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 401*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 402*9880d681SAndroid Build Coastguard Worker %1 = load i32, i32* %b, align 4 403*9880d681SAndroid Build Coastguard Worker %2 = insertelement <4 x i32> undef, i32 %1, i32 0 404*9880d681SAndroid Build Coastguard Worker %result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 405*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %result 406*9880d681SAndroid Build Coastguard Worker} 407*9880d681SAndroid Build Coastguard Worker 408*9880d681SAndroid Build Coastguard Worker;;;;;; Shuffles optimizable with a single insertps or blend instruction 409*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) { 410*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XYZ0: 411*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 412*9880d681SAndroid Build Coastguard Worker; X32-NEXT: xorps %xmm1, %xmm1 413*9880d681SAndroid Build Coastguard Worker; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 414*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 415*9880d681SAndroid Build Coastguard Worker; 416*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XYZ0: 417*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 418*9880d681SAndroid Build Coastguard Worker; X64-NEXT: xorps %xmm1, %xmm1 419*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 420*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 421*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 422*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 423*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %x, i32 1 424*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 425*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %x, i32 2 426*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2 427*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3 428*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 429*9880d681SAndroid Build Coastguard Worker} 430*9880d681SAndroid Build Coastguard Worker 431*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) { 432*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XY00: 433*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 434*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 435*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 436*9880d681SAndroid Build Coastguard Worker; 437*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XY00: 438*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 439*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 440*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 441*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 442*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 443*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %x, i32 1 444*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 445*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2 446*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3 447*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 448*9880d681SAndroid Build Coastguard Worker} 449*9880d681SAndroid Build Coastguard Worker 450*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) { 451*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XYY0: 452*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 453*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero 454*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 455*9880d681SAndroid Build Coastguard Worker; 456*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XYY0: 457*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 458*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero 459*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 460*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 461*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 462*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %x, i32 1 463*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 464*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext1, i32 2 465*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3 466*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 467*9880d681SAndroid Build Coastguard Worker} 468*9880d681SAndroid Build Coastguard Worker 469*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) { 470*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XYW0: 471*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 472*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero 473*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 474*9880d681SAndroid Build Coastguard Worker; 475*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XYW0: 476*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 477*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero 478*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 479*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 480*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 481*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %x, i32 1 482*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 483*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %x, i32 3 484*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float %vecext2, i32 2 485*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3 486*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 487*9880d681SAndroid Build Coastguard Worker} 488*9880d681SAndroid Build Coastguard Worker 489*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) { 490*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_W00W: 491*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 492*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3] 493*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 494*9880d681SAndroid Build Coastguard Worker; 495*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_W00W: 496*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 497*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3] 498*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 499*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 3 500*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 501*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float 0.0, i32 1 502*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2 503*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float %vecext, i32 3 504*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 505*9880d681SAndroid Build Coastguard Worker} 506*9880d681SAndroid Build Coastguard Worker 507*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) { 508*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_X00A: 509*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 510*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] 511*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 512*9880d681SAndroid Build Coastguard Worker; 513*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_X00A: 514*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 515*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] 516*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 517*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 518*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 519*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1 520*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2 521*9880d681SAndroid Build Coastguard Worker %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 522*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 523*9880d681SAndroid Build Coastguard Worker} 524*9880d681SAndroid Build Coastguard Worker 525*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) { 526*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_X00X: 527*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 528*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0] 529*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 530*9880d681SAndroid Build Coastguard Worker; 531*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_X00X: 532*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 533*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0] 534*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 535*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 536*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 537*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1 538*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2 539*9880d681SAndroid Build Coastguard Worker %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 540*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 541*9880d681SAndroid Build Coastguard Worker} 542*9880d681SAndroid Build Coastguard Worker 543*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) { 544*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_X0YC: 545*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 546*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 547*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2] 548*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 549*9880d681SAndroid Build Coastguard Worker; 550*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_X0YC: 551*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 552*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 553*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2] 554*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 555*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 556*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 557*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1 558*9880d681SAndroid Build Coastguard Worker %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef> 559*9880d681SAndroid Build Coastguard Worker %vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6> 560*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 561*9880d681SAndroid Build Coastguard Worker} 562*9880d681SAndroid Build Coastguard Worker 563*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) { 564*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XYZ0: 565*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 566*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pxor %xmm1, %xmm1 567*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 568*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 569*9880d681SAndroid Build Coastguard Worker; 570*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XYZ0: 571*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 572*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm1, %xmm1 573*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 574*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 575*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 576*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 577*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %x, i32 1 578*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 579*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x i32> %x, i32 2 580*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 581*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3 582*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit5 583*9880d681SAndroid Build Coastguard Worker} 584*9880d681SAndroid Build Coastguard Worker 585*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) { 586*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XY00: 587*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 588*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 589*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 590*9880d681SAndroid Build Coastguard Worker; 591*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XY00: 592*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 593*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 594*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 595*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 596*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 597*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %x, i32 1 598*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 599*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2 600*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3 601*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit4 602*9880d681SAndroid Build Coastguard Worker} 603*9880d681SAndroid Build Coastguard Worker 604*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) { 605*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XYY0: 606*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 607*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] 608*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pxor %xmm0, %xmm0 609*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 610*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 611*9880d681SAndroid Build Coastguard Worker; 612*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XYY0: 613*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 614*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] 615*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm0, %xmm0 616*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 617*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 618*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 619*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 620*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %x, i32 1 621*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 622*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext1, i32 2 623*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3 624*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit5 625*9880d681SAndroid Build Coastguard Worker} 626*9880d681SAndroid Build Coastguard Worker 627*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) { 628*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XYW0: 629*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 630*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3] 631*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pxor %xmm0, %xmm0 632*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 633*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 634*9880d681SAndroid Build Coastguard Worker; 635*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XYW0: 636*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 637*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3] 638*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm0, %xmm0 639*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 640*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 641*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 642*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 643*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %x, i32 1 644*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 645*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x i32> %x, i32 3 646*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %vecext2, i32 2 647*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3 648*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit4 649*9880d681SAndroid Build Coastguard Worker} 650*9880d681SAndroid Build Coastguard Worker 651*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) { 652*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_W00W: 653*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 654*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 655*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pxor %xmm0, %xmm0 656*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 657*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 658*9880d681SAndroid Build Coastguard Worker; 659*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_W00W: 660*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 661*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] 662*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm0, %xmm0 663*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 664*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 665*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 3 666*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 667*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit, i32 0, i32 1 668*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2 669*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x i32> %vecinit3, i32 %vecext, i32 3 670*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit4 671*9880d681SAndroid Build Coastguard Worker} 672*9880d681SAndroid Build Coastguard Worker 673*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) { 674*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_X00A: 675*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 676*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pxor %xmm2, %xmm2 677*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 678*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 679*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 680*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 681*9880d681SAndroid Build Coastguard Worker; 682*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_X00A: 683*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 684*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm2, %xmm2 685*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 686*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 687*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 688*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 689*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 690*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 691*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1 692*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2 693*9880d681SAndroid Build Coastguard Worker %vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 694*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit4 695*9880d681SAndroid Build Coastguard Worker} 696*9880d681SAndroid Build Coastguard Worker 697*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) { 698*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_X00X: 699*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 700*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pxor %xmm1, %xmm1 701*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 702*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 703*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 704*9880d681SAndroid Build Coastguard Worker; 705*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_X00X: 706*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 707*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm1, %xmm1 708*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 709*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 710*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 711*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 712*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 713*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1 714*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2 715*9880d681SAndroid Build Coastguard Worker %vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 716*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit4 717*9880d681SAndroid Build Coastguard Worker} 718*9880d681SAndroid Build Coastguard Worker 719*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) { 720*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_X0YC: 721*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 722*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 723*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2] 724*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] 725*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 726*9880d681SAndroid Build Coastguard Worker; 727*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_X0YC: 728*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 729*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 730*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2] 731*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] 732*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 733*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %x, i32 0 734*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 735*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1 736*9880d681SAndroid Build Coastguard Worker %vecinit3 = shufflevector <4 x i32> %vecinit1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef> 737*9880d681SAndroid Build Coastguard Worker %vecinit5 = shufflevector <4 x i32> %vecinit3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6> 738*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit5 739*9880d681SAndroid Build Coastguard Worker} 740*9880d681SAndroid Build Coastguard Worker 741*9880d681SAndroid Build Coastguard Worker;; Test for a bug in the first implementation of LowerBuildVectorv4x32 742*9880d681SAndroid Build Coastguard Workerdefine < 4 x float> @test_insertps_no_undef(<4 x float> %x) { 743*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_insertps_no_undef: 744*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 745*9880d681SAndroid Build Coastguard Worker; X32-NEXT: xorps %xmm1, %xmm1 746*9880d681SAndroid Build Coastguard Worker; X32-NEXT: blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3] 747*9880d681SAndroid Build Coastguard Worker; X32-NEXT: maxps %xmm1, %xmm0 748*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 749*9880d681SAndroid Build Coastguard Worker; 750*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_insertps_no_undef: 751*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 752*9880d681SAndroid Build Coastguard Worker; X64-NEXT: xorps %xmm1, %xmm1 753*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3] 754*9880d681SAndroid Build Coastguard Worker; X64-NEXT: maxps %xmm1, %xmm0 755*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 756*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %x, i32 0 757*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 758*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %x, i32 1 759*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 760*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %x, i32 2 761*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2 762*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3 763*9880d681SAndroid Build Coastguard Worker %mask = fcmp olt <4 x float> %vecinit5, %x 764*9880d681SAndroid Build Coastguard Worker %res = select <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5 765*9880d681SAndroid Build Coastguard Worker ret <4 x float> %res 766*9880d681SAndroid Build Coastguard Worker} 767*9880d681SAndroid Build Coastguard Worker 768*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) { 769*9880d681SAndroid Build Coastguard Worker; X32-LABEL: blendvb_fallback: 770*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 771*9880d681SAndroid Build Coastguard Worker; X32-NEXT: psllw $15, %xmm0 772*9880d681SAndroid Build Coastguard Worker; X32-NEXT: psraw $15, %xmm0 773*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendvb %xmm1, %xmm2 774*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movdqa %xmm2, %xmm0 775*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 776*9880d681SAndroid Build Coastguard Worker; 777*9880d681SAndroid Build Coastguard Worker; X64-LABEL: blendvb_fallback: 778*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 779*9880d681SAndroid Build Coastguard Worker; X64-NEXT: psllw $15, %xmm0 780*9880d681SAndroid Build Coastguard Worker; X64-NEXT: psraw $15, %xmm0 781*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendvb %xmm1, %xmm2 782*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa %xmm2, %xmm0 783*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 784*9880d681SAndroid Build Coastguard Worker %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y 785*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %ret 786*9880d681SAndroid Build Coastguard Worker} 787*9880d681SAndroid Build Coastguard Worker 788*9880d681SAndroid Build Coastguard Worker; On X32, account for the argument's move to registers 789*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) { 790*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_vector_load: 791*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 792*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 793*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 794*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 795*9880d681SAndroid Build Coastguard Worker; 796*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_vector_load: 797*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 798*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 799*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 800*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float>* %pb, align 16 801*9880d681SAndroid Build Coastguard Worker %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48) 802*9880d681SAndroid Build Coastguard Worker ret <4 x float> %2 803*9880d681SAndroid Build Coastguard Worker} 804*9880d681SAndroid Build Coastguard Worker 805*9880d681SAndroid Build Coastguard Worker;; Use a non-zero CountS for insertps 806*9880d681SAndroid Build Coastguard Worker;; Try to match a bit more of the instr, since we need the load's offset. 807*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) { 808*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_vector_load_offset: 809*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 810*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 811*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 812*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 813*9880d681SAndroid Build Coastguard Worker; 814*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_vector_load_offset: 815*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 816*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 817*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 818*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float>* %pb, align 16 819*9880d681SAndroid Build Coastguard Worker %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96) 820*9880d681SAndroid Build Coastguard Worker ret <4 x float> %2 821*9880d681SAndroid Build Coastguard Worker} 822*9880d681SAndroid Build Coastguard Worker 823*9880d681SAndroid Build Coastguard Worker;; Try to match a bit more of the instr, since we need the load's offset. 824*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) { 825*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_vector_load_offset_2: 826*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 827*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 828*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 829*9880d681SAndroid Build Coastguard Worker; X32-NEXT: shll $4, %ecx 830*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3] 831*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 832*9880d681SAndroid Build Coastguard Worker; 833*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_vector_load_offset_2: 834*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 835*9880d681SAndroid Build Coastguard Worker; X64-NEXT: shlq $4, %rsi 836*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3] 837*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 838*9880d681SAndroid Build Coastguard Worker %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index 839*9880d681SAndroid Build Coastguard Worker %2 = load <4 x float>, <4 x float>* %1, align 16 840*9880d681SAndroid Build Coastguard Worker %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192) 841*9880d681SAndroid Build Coastguard Worker ret <4 x float> %3 842*9880d681SAndroid Build Coastguard Worker} 843*9880d681SAndroid Build Coastguard Worker 844*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) { 845*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_broadcast_loadf32: 846*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 847*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 848*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 849*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 850*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 851*9880d681SAndroid Build Coastguard Worker; 852*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_broadcast_loadf32: 853*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 854*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 855*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 856*9880d681SAndroid Build Coastguard Worker %1 = getelementptr inbounds float, float* %fb, i64 %index 857*9880d681SAndroid Build Coastguard Worker %2 = load float, float* %1, align 4 858*9880d681SAndroid Build Coastguard Worker %3 = insertelement <4 x float> undef, float %2, i32 0 859*9880d681SAndroid Build Coastguard Worker %4 = insertelement <4 x float> %3, float %2, i32 1 860*9880d681SAndroid Build Coastguard Worker %5 = insertelement <4 x float> %4, float %2, i32 2 861*9880d681SAndroid Build Coastguard Worker %6 = insertelement <4 x float> %5, float %2, i32 3 862*9880d681SAndroid Build Coastguard Worker %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48) 863*9880d681SAndroid Build Coastguard Worker ret <4 x float> %7 864*9880d681SAndroid Build Coastguard Worker} 865*9880d681SAndroid Build Coastguard Worker 866*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) { 867*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_broadcast_loadv4f32: 868*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 869*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 870*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 871*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 872*9880d681SAndroid Build Coastguard Worker; 873*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_broadcast_loadv4f32: 874*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 875*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 876*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 877*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float>* %b, align 4 878*9880d681SAndroid Build Coastguard Worker %2 = extractelement <4 x float> %1, i32 0 879*9880d681SAndroid Build Coastguard Worker %3 = insertelement <4 x float> undef, float %2, i32 0 880*9880d681SAndroid Build Coastguard Worker %4 = insertelement <4 x float> %3, float %2, i32 1 881*9880d681SAndroid Build Coastguard Worker %5 = insertelement <4 x float> %4, float %2, i32 2 882*9880d681SAndroid Build Coastguard Worker %6 = insertelement <4 x float> %5, float %2, i32 3 883*9880d681SAndroid Build Coastguard Worker %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48) 884*9880d681SAndroid Build Coastguard Worker ret <4 x float> %7 885*9880d681SAndroid Build Coastguard Worker} 886*9880d681SAndroid Build Coastguard Worker 887*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) { 888*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_broadcast_multiple_use: 889*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 890*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 891*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 892*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero 893*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] 894*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] 895*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0] 896*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0] 897*9880d681SAndroid Build Coastguard Worker; X32-NEXT: addps %xmm1, %xmm0 898*9880d681SAndroid Build Coastguard Worker; X32-NEXT: addps %xmm2, %xmm3 899*9880d681SAndroid Build Coastguard Worker; X32-NEXT: addps %xmm3, %xmm0 900*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 901*9880d681SAndroid Build Coastguard Worker; 902*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_broadcast_multiple_use: 903*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 904*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero 905*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] 906*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] 907*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0] 908*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0] 909*9880d681SAndroid Build Coastguard Worker; X64-NEXT: addps %xmm1, %xmm0 910*9880d681SAndroid Build Coastguard Worker; X64-NEXT: addps %xmm2, %xmm3 911*9880d681SAndroid Build Coastguard Worker; X64-NEXT: addps %xmm3, %xmm0 912*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 913*9880d681SAndroid Build Coastguard Worker %1 = getelementptr inbounds float, float* %fb, i64 %index 914*9880d681SAndroid Build Coastguard Worker %2 = load float, float* %1, align 4 915*9880d681SAndroid Build Coastguard Worker %3 = insertelement <4 x float> undef, float %2, i32 0 916*9880d681SAndroid Build Coastguard Worker %4 = insertelement <4 x float> %3, float %2, i32 1 917*9880d681SAndroid Build Coastguard Worker %5 = insertelement <4 x float> %4, float %2, i32 2 918*9880d681SAndroid Build Coastguard Worker %6 = insertelement <4 x float> %5, float %2, i32 3 919*9880d681SAndroid Build Coastguard Worker %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48) 920*9880d681SAndroid Build Coastguard Worker %8 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %b, <4 x float> %6, i32 48) 921*9880d681SAndroid Build Coastguard Worker %9 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %c, <4 x float> %6, i32 48) 922*9880d681SAndroid Build Coastguard Worker %10 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %d, <4 x float> %6, i32 48) 923*9880d681SAndroid Build Coastguard Worker %11 = fadd <4 x float> %7, %8 924*9880d681SAndroid Build Coastguard Worker %12 = fadd <4 x float> %9, %10 925*9880d681SAndroid Build Coastguard Worker %13 = fadd <4 x float> %11, %12 926*9880d681SAndroid Build Coastguard Worker ret <4 x float> %13 927*9880d681SAndroid Build Coastguard Worker} 928*9880d681SAndroid Build Coastguard Worker 929*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) { 930*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_with_undefs: 931*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 932*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 933*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 934*9880d681SAndroid Build Coastguard Worker; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 935*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movapd %xmm1, %xmm0 936*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 937*9880d681SAndroid Build Coastguard Worker; 938*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_with_undefs: 939*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 940*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 941*9880d681SAndroid Build Coastguard Worker; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 942*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movapd %xmm1, %xmm0 943*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 944*9880d681SAndroid Build Coastguard Worker %1 = load float, float* %b, align 4 945*9880d681SAndroid Build Coastguard Worker %2 = insertelement <4 x float> undef, float %1, i32 0 946*9880d681SAndroid Build Coastguard Worker %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7> 947*9880d681SAndroid Build Coastguard Worker ret <4 x float> %result 948*9880d681SAndroid Build Coastguard Worker} 949*9880d681SAndroid Build Coastguard Worker 950*9880d681SAndroid Build Coastguard Worker; Test for a bug in X86ISelLowering.cpp:getINSERTPS where we were using 951*9880d681SAndroid Build Coastguard Worker; the destination index to change the load, instead of the source index. 952*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) { 953*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pr20087: 954*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 955*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 956*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0] 957*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 958*9880d681SAndroid Build Coastguard Worker; 959*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pr20087: 960*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 961*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0] 962*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 963*9880d681SAndroid Build Coastguard Worker %load = load <4 x float> , <4 x float> *%ptr 964*9880d681SAndroid Build Coastguard Worker %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2> 965*9880d681SAndroid Build Coastguard Worker ret <4 x float> %ret 966*9880d681SAndroid Build Coastguard Worker} 967*9880d681SAndroid Build Coastguard Worker 968*9880d681SAndroid Build Coastguard Worker; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1> 969*9880d681SAndroid Build Coastguard Workerdefine void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 { 970*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_pr20411: 971*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 972*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 973*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 974*9880d681SAndroid Build Coastguard Worker; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 975*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movdqu %xmm1, (%eax) 976*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 977*9880d681SAndroid Build Coastguard Worker; 978*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_pr20411: 979*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 980*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 981*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 982*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqu %xmm1, (%rdi) 983*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 984*9880d681SAndroid Build Coastguard Worker %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef> 985*9880d681SAndroid Build Coastguard Worker %ptrcast = bitcast i32* %RET to <4 x i32>* 986*9880d681SAndroid Build Coastguard Worker store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4 987*9880d681SAndroid Build Coastguard Worker ret void 988*9880d681SAndroid Build Coastguard Worker} 989*9880d681SAndroid Build Coastguard Worker 990*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_4(<4 x float> %A, <4 x float> %B) { 991*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_4: 992*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 993*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero 994*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 995*9880d681SAndroid Build Coastguard Worker; 996*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_4: 997*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 998*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero 999*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1000*9880d681SAndroid Build Coastguard Workerentry: 1001*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 1002*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 1003*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1 1004*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %B, i32 2 1005*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2 1006*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3 1007*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 1008*9880d681SAndroid Build Coastguard Worker} 1009*9880d681SAndroid Build Coastguard Worker 1010*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_5(<4 x float> %A, <4 x float> %B) { 1011*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_5: 1012*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1013*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero 1014*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1015*9880d681SAndroid Build Coastguard Worker; 1016*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_5: 1017*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1018*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero 1019*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1020*9880d681SAndroid Build Coastguard Workerentry: 1021*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 1022*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 1023*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %B, i32 1 1024*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 1025*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2 1026*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3 1027*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 1028*9880d681SAndroid Build Coastguard Worker} 1029*9880d681SAndroid Build Coastguard Worker 1030*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_6(<4 x float> %A, <4 x float> %B) { 1031*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_6: 1032*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1033*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero 1034*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1035*9880d681SAndroid Build Coastguard Worker; 1036*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_6: 1037*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1038*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero 1039*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1040*9880d681SAndroid Build Coastguard Workerentry: 1041*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 1 1042*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1 1043*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %B, i32 2 1044*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2 1045*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3 1046*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit3 1047*9880d681SAndroid Build Coastguard Worker} 1048*9880d681SAndroid Build Coastguard Worker 1049*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_7(<4 x float> %A, <4 x float> %B) { 1050*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_7: 1051*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1052*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero 1053*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1054*9880d681SAndroid Build Coastguard Worker; 1055*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_7: 1056*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1057*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero 1058*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1059*9880d681SAndroid Build Coastguard Workerentry: 1060*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 1061*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 1062*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1 1063*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %B, i32 1 1064*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2 1065*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3 1066*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 1067*9880d681SAndroid Build Coastguard Worker} 1068*9880d681SAndroid Build Coastguard Worker 1069*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_8(<4 x float> %A, <4 x float> %B) { 1070*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_8: 1071*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1072*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1073*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1074*9880d681SAndroid Build Coastguard Worker; 1075*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_8: 1076*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1077*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1078*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1079*9880d681SAndroid Build Coastguard Workerentry: 1080*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 1081*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 1082*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %B, i32 0 1083*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1 1084*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2 1085*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3 1086*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit4 1087*9880d681SAndroid Build Coastguard Worker} 1088*9880d681SAndroid Build Coastguard Worker 1089*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_9(<4 x float> %A, <4 x float> %B) { 1090*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_9: 1091*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1092*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero 1093*9880d681SAndroid Build Coastguard Worker; X32-NEXT: movaps %xmm1, %xmm0 1094*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1095*9880d681SAndroid Build Coastguard Worker; 1096*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_9: 1097*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1098*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero 1099*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movaps %xmm1, %xmm0 1100*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1101*9880d681SAndroid Build Coastguard Workerentry: 1102*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 1103*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1 1104*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %B, i32 2 1105*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2 1106*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3 1107*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit3 1108*9880d681SAndroid Build Coastguard Worker} 1109*9880d681SAndroid Build Coastguard Worker 1110*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_10(<4 x float> %A) 1111*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_10: 1112*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: 1113*9880d681SAndroid Build Coastguard Worker; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero 1114*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1115*9880d681SAndroid Build Coastguard Worker; 1116*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_10: 1117*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 1118*9880d681SAndroid Build Coastguard Worker; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero 1119*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1120*9880d681SAndroid Build Coastguard Worker{ 1121*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 1122*9880d681SAndroid Build Coastguard Worker %vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0 1123*9880d681SAndroid Build Coastguard Worker %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2 1124*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecbuild2 1125*9880d681SAndroid Build Coastguard Worker} 1126*9880d681SAndroid Build Coastguard Worker 1127*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) { 1128*9880d681SAndroid Build Coastguard Worker; X32-LABEL: build_vector_to_shuffle_1: 1129*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1130*9880d681SAndroid Build Coastguard Worker; X32-NEXT: xorps %xmm1, %xmm1 1131*9880d681SAndroid Build Coastguard Worker; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 1132*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1133*9880d681SAndroid Build Coastguard Worker; 1134*9880d681SAndroid Build Coastguard Worker; X64-LABEL: build_vector_to_shuffle_1: 1135*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1136*9880d681SAndroid Build Coastguard Worker; X64-NEXT: xorps %xmm1, %xmm1 1137*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 1138*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1139*9880d681SAndroid Build Coastguard Workerentry: 1140*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 1 1141*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1 1142*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2 1143*9880d681SAndroid Build Coastguard Worker %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1144*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit3 1145*9880d681SAndroid Build Coastguard Worker} 1146*9880d681SAndroid Build Coastguard Worker 1147*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) { 1148*9880d681SAndroid Build Coastguard Worker; X32-LABEL: build_vector_to_shuffle_2: 1149*9880d681SAndroid Build Coastguard Worker; X32: ## BB#0: ## %entry 1150*9880d681SAndroid Build Coastguard Worker; X32-NEXT: xorps %xmm1, %xmm1 1151*9880d681SAndroid Build Coastguard Worker; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1152*9880d681SAndroid Build Coastguard Worker; X32-NEXT: retl 1153*9880d681SAndroid Build Coastguard Worker; 1154*9880d681SAndroid Build Coastguard Worker; X64-LABEL: build_vector_to_shuffle_2: 1155*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 1156*9880d681SAndroid Build Coastguard Worker; X64-NEXT: xorps %xmm1, %xmm1 1157*9880d681SAndroid Build Coastguard Worker; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1158*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 1159*9880d681SAndroid Build Coastguard Workerentry: 1160*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 1 1161*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1 1162*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2 1163*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit1 1164*9880d681SAndroid Build Coastguard Worker} 1165