1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; These are tests for SSE3 codegen. 3*9880d681SAndroid Build Coastguard Worker 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin9 --mattr=+sse3 | FileCheck %s --check-prefix=X64 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker; Test for v8xi16 lowering where we extract the first element of the vector and 7*9880d681SAndroid Build Coastguard Worker; placed it in the second element of the result. 8*9880d681SAndroid Build Coastguard Worker 9*9880d681SAndroid Build Coastguard Workerdefine void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind { 10*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t0: 11*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 12*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movl $1, %eax 13*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movd %eax, %xmm0 14*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 15*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa %xmm0, (%rdi) 16*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 17*9880d681SAndroid Build Coastguard Workerentry: 18*9880d681SAndroid Build Coastguard Worker %tmp3 = load <8 x i16>, <8 x i16>* %old 19*9880d681SAndroid Build Coastguard Worker %tmp6 = shufflevector <8 x i16> %tmp3, 20*9880d681SAndroid Build Coastguard Worker <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >, 21*9880d681SAndroid Build Coastguard Worker <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef > 22*9880d681SAndroid Build Coastguard Worker store <8 x i16> %tmp6, <8 x i16>* %dest 23*9880d681SAndroid Build Coastguard Worker ret void 24*9880d681SAndroid Build Coastguard Worker} 25*9880d681SAndroid Build Coastguard Worker 26*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind { 27*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t1: 28*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 29*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,65535] 30*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movaps %xmm0, %xmm1 31*9880d681SAndroid Build Coastguard Worker; X64-NEXT: andnps (%rsi), %xmm1 32*9880d681SAndroid Build Coastguard Worker; X64-NEXT: andps (%rdi), %xmm0 33*9880d681SAndroid Build Coastguard Worker; X64-NEXT: orps %xmm1, %xmm0 34*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 35*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 36*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 37*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > 38*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 39*9880d681SAndroid Build Coastguard Worker 40*9880d681SAndroid Build Coastguard Worker} 41*9880d681SAndroid Build Coastguard Worker 42*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind { 43*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t2: 44*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 45*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535] 46*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pand %xmm2, %xmm0 47*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,1,4,5,6,7] 48*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pandn %xmm1, %xmm2 49*9880d681SAndroid Build Coastguard Worker; X64-NEXT: por %xmm2, %xmm0 50*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 51*9880d681SAndroid Build Coastguard Worker %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 > 52*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp 53*9880d681SAndroid Build Coastguard Worker} 54*9880d681SAndroid Build Coastguard Worker 55*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind { 56*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t3: 57*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 58*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 59*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,5] 60*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 61*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 62*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 63*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 64*9880d681SAndroid Build Coastguard Worker %tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 > 65*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp 66*9880d681SAndroid Build Coastguard Worker} 67*9880d681SAndroid Build Coastguard Worker 68*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind { 69*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t4: 70*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 71*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 72*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 73*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 74*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7] 75*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 76*9880d681SAndroid Build Coastguard Worker %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 > 77*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp 78*9880d681SAndroid Build Coastguard Worker} 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind { 81*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t5: 82*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 83*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 84*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa %xmm1, %xmm0 85*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 86*9880d681SAndroid Build Coastguard Worker %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 > 87*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp 88*9880d681SAndroid Build Coastguard Worker} 89*9880d681SAndroid Build Coastguard Worker 90*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind { 91*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t6: 92*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 93*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 94*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 95*9880d681SAndroid Build Coastguard Worker %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > 96*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp 97*9880d681SAndroid Build Coastguard Worker} 98*9880d681SAndroid Build Coastguard Worker 99*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind { 100*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t7: 101*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 102*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 103*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 104*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 105*9880d681SAndroid Build Coastguard Worker %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 > 106*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp 107*9880d681SAndroid Build Coastguard Worker} 108*9880d681SAndroid Build Coastguard Worker 109*9880d681SAndroid Build Coastguard Workerdefine void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind { 110*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t8: 111*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 112*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[2,1,0,3,4,5,6,7] 113*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 114*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa %xmm0, (%rdi) 115*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 116*9880d681SAndroid Build Coastguard Worker %tmp = load <2 x i64>, <2 x i64>* %A 117*9880d681SAndroid Build Coastguard Worker %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> 118*9880d681SAndroid Build Coastguard Worker %tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0 119*9880d681SAndroid Build Coastguard Worker %tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1 120*9880d681SAndroid Build Coastguard Worker %tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2 121*9880d681SAndroid Build Coastguard Worker %tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3 122*9880d681SAndroid Build Coastguard Worker %tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4 123*9880d681SAndroid Build Coastguard Worker %tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5 124*9880d681SAndroid Build Coastguard Worker %tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6 125*9880d681SAndroid Build Coastguard Worker %tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7 126*9880d681SAndroid Build Coastguard Worker %tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0 127*9880d681SAndroid Build Coastguard Worker %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1 128*9880d681SAndroid Build Coastguard Worker %tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2 129*9880d681SAndroid Build Coastguard Worker %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3 130*9880d681SAndroid Build Coastguard Worker %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4 131*9880d681SAndroid Build Coastguard Worker %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5 132*9880d681SAndroid Build Coastguard Worker %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6 133*9880d681SAndroid Build Coastguard Worker %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7 134*9880d681SAndroid Build Coastguard Worker %tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64> 135*9880d681SAndroid Build Coastguard Worker store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res 136*9880d681SAndroid Build Coastguard Worker ret void 137*9880d681SAndroid Build Coastguard Worker} 138*9880d681SAndroid Build Coastguard Worker 139*9880d681SAndroid Build Coastguard Workerdefine void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind { 140*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t9: 141*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 142*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movapd (%rdi), %xmm0 143*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 144*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movapd %xmm0, (%rdi) 145*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 146*9880d681SAndroid Build Coastguard Worker %tmp = load <4 x float>, <4 x float>* %r 147*9880d681SAndroid Build Coastguard Worker %tmp.upgrd.3 = bitcast <2 x i32>* %A to double* 148*9880d681SAndroid Build Coastguard Worker %tmp.upgrd.4 = load double, double* %tmp.upgrd.3 149*9880d681SAndroid Build Coastguard Worker %tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0 150*9880d681SAndroid Build Coastguard Worker %tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1 151*9880d681SAndroid Build Coastguard Worker %tmp6 = bitcast <2 x double> %tmp5 to <4 x float> 152*9880d681SAndroid Build Coastguard Worker %tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0 153*9880d681SAndroid Build Coastguard Worker %tmp7 = extractelement <4 x float> %tmp, i32 1 154*9880d681SAndroid Build Coastguard Worker %tmp8 = extractelement <4 x float> %tmp6, i32 0 155*9880d681SAndroid Build Coastguard Worker %tmp9 = extractelement <4 x float> %tmp6, i32 1 156*9880d681SAndroid Build Coastguard Worker %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0 157*9880d681SAndroid Build Coastguard Worker %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 158*9880d681SAndroid Build Coastguard Worker %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 159*9880d681SAndroid Build Coastguard Worker %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 160*9880d681SAndroid Build Coastguard Worker store <4 x float> %tmp13, <4 x float>* %r 161*9880d681SAndroid Build Coastguard Worker ret void 162*9880d681SAndroid Build Coastguard Worker} 163*9880d681SAndroid Build Coastguard Worker 164*9880d681SAndroid Build Coastguard Worker 165*9880d681SAndroid Build Coastguard Worker 166*9880d681SAndroid Build Coastguard Worker; FIXME: This testcase produces icky code. It can be made much better! 167*9880d681SAndroid Build Coastguard Worker; PR2585 168*9880d681SAndroid Build Coastguard Worker 169*9880d681SAndroid Build Coastguard Worker@g1 = external constant <4 x i32> 170*9880d681SAndroid Build Coastguard Worker@g2 = external constant <4 x i16> 171*9880d681SAndroid Build Coastguard Worker 172*9880d681SAndroid Build Coastguard Workerdefine void @t10() nounwind { 173*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t10: 174*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: 175*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movq _g1@{{.*}}(%rip), %rax 176*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7] 177*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 178*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 179*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movq _g2@{{.*}}(%rip), %rax 180*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movq %xmm0, (%rax) 181*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 182*9880d681SAndroid Build Coastguard Worker load <4 x i32>, <4 x i32>* @g1, align 16 183*9880d681SAndroid Build Coastguard Worker bitcast <4 x i32> %1 to <8 x i16> 184*9880d681SAndroid Build Coastguard Worker shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef > 185*9880d681SAndroid Build Coastguard Worker bitcast <8 x i16> %3 to <2 x i64> 186*9880d681SAndroid Build Coastguard Worker extractelement <2 x i64> %4, i32 0 187*9880d681SAndroid Build Coastguard Worker bitcast i64 %5 to <4 x i16> 188*9880d681SAndroid Build Coastguard Worker store <4 x i16> %6, <4 x i16>* @g2, align 8 189*9880d681SAndroid Build Coastguard Worker ret void 190*9880d681SAndroid Build Coastguard Worker} 191*9880d681SAndroid Build Coastguard Worker 192*9880d681SAndroid Build Coastguard Worker; Pack various elements via shuffles. 193*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 194*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t11: 195*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 196*9880d681SAndroid Build Coastguard Worker; X64-NEXT: psrld $16, %xmm0 197*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 198*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 199*9880d681SAndroid Build Coastguard Workerentry: 200*9880d681SAndroid Build Coastguard Worker %tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > 201*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp7 202*9880d681SAndroid Build Coastguard Worker 203*9880d681SAndroid Build Coastguard Worker} 204*9880d681SAndroid Build Coastguard Worker 205*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 206*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t12: 207*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 208*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 209*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 210*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3] 211*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 212*9880d681SAndroid Build Coastguard Workerentry: 213*9880d681SAndroid Build Coastguard Worker %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef > 214*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp9 215*9880d681SAndroid Build Coastguard Worker 216*9880d681SAndroid Build Coastguard Worker} 217*9880d681SAndroid Build Coastguard Worker 218*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 219*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t13: 220*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 221*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 222*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] 223*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3] 224*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 225*9880d681SAndroid Build Coastguard Workerentry: 226*9880d681SAndroid Build Coastguard Worker %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef > 227*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp9 228*9880d681SAndroid Build Coastguard Worker} 229*9880d681SAndroid Build Coastguard Worker 230*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 231*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t14: 232*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 233*9880d681SAndroid Build Coastguard Worker; X64-NEXT: psrlq $16, %xmm0 234*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 235*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa %xmm1, %xmm0 236*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 237*9880d681SAndroid Build Coastguard Workerentry: 238*9880d681SAndroid Build Coastguard Worker %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef > 239*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp9 240*9880d681SAndroid Build Coastguard Worker} 241*9880d681SAndroid Build Coastguard Worker 242*9880d681SAndroid Build Coastguard Worker; FIXME: t15 is worse off from disabling of scheduler 2-address hack. 243*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 244*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t15: 245*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 246*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 247*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] 248*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 249*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 250*9880d681SAndroid Build Coastguard Workerentry: 251*9880d681SAndroid Build Coastguard Worker %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > 252*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp8 253*9880d681SAndroid Build Coastguard Worker} 254*9880d681SAndroid Build Coastguard Worker 255*9880d681SAndroid Build Coastguard Worker; Test yonah where we convert a shuffle to pextrw and pinrsw 256*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @t16(<16 x i8> %T0) nounwind readnone { 257*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t16: 258*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 259*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0] 260*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 261*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movdqa %xmm1, %xmm0 262*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 263*9880d681SAndroid Build Coastguard Workerentry: 264*9880d681SAndroid Build Coastguard Worker %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > 265*9880d681SAndroid Build Coastguard Worker %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > 266*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp9 267*9880d681SAndroid Build Coastguard Worker} 268*9880d681SAndroid Build Coastguard Worker 269*9880d681SAndroid Build Coastguard Worker; rdar://8520311 270*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @t17() nounwind { 271*9880d681SAndroid Build Coastguard Worker; X64-LABEL: t17: 272*9880d681SAndroid Build Coastguard Worker; X64: ## BB#0: ## %entry 273*9880d681SAndroid Build Coastguard Worker; X64-NEXT: movaps (%rax), %xmm0 274*9880d681SAndroid Build Coastguard Worker; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] 275*9880d681SAndroid Build Coastguard Worker; X64-NEXT: pxor %xmm1, %xmm1 276*9880d681SAndroid Build Coastguard Worker; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 277*9880d681SAndroid Build Coastguard Worker; X64-NEXT: retq 278*9880d681SAndroid Build Coastguard Workerentry: 279*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* undef, align 16 280*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 281*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x float>, <4 x float>* undef, align 16 282*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 283*9880d681SAndroid Build Coastguard Worker %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32> 284*9880d681SAndroid Build Coastguard Worker %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 285*9880d681SAndroid Build Coastguard Worker %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0> 286*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp7 287*9880d681SAndroid Build Coastguard Worker} 288