xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/sse41.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X32
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X64
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker@g16 = external global i16
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
7*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pinsrd_1:
8*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
9*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pinsrd $1, {{[0-9]+}}(%esp), %xmm0
10*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
11*9880d681SAndroid Build Coastguard Worker;
12*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pinsrd_1:
13*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
14*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pinsrd $1, %edi, %xmm0
15*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
16*9880d681SAndroid Build Coastguard Worker  %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
17*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp1
18*9880d681SAndroid Build Coastguard Worker}
19*9880d681SAndroid Build Coastguard Worker
20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
21*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pinsrb_1:
22*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
23*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pinsrb $1, {{[0-9]+}}(%esp), %xmm0
24*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
25*9880d681SAndroid Build Coastguard Worker;
26*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pinsrb_1:
27*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
28*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pinsrb $1, %edi, %xmm0
29*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
30*9880d681SAndroid Build Coastguard Worker  %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
31*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %tmp1
32*9880d681SAndroid Build Coastguard Worker}
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @pmovzxbq_1() nounwind {
35*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pmovzxbq_1:
36*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
37*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl L_g16$non_lazy_ptr, %eax
38*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
39*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
40*9880d681SAndroid Build Coastguard Worker;
41*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pmovzxbq_1:
42*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
43*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movq _g16@{{.*}}(%rip), %rax
44*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
45*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
46*9880d681SAndroid Build Coastguard Workerentry:
47*9880d681SAndroid Build Coastguard Worker	%0 = load i16, i16* @g16, align 2		; <i16> [#uses=1]
48*9880d681SAndroid Build Coastguard Worker	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
49*9880d681SAndroid Build Coastguard Worker	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
50*9880d681SAndroid Build Coastguard Worker	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
51*9880d681SAndroid Build Coastguard Worker	ret <2 x i64> %3
52*9880d681SAndroid Build Coastguard Worker}
53*9880d681SAndroid Build Coastguard Worker
54*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Workerdefine i32 @extractps_1(<4 x float> %v) nounwind {
57*9880d681SAndroid Build Coastguard Worker; X32-LABEL: extractps_1:
58*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
59*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    extractps $3, %xmm0, %eax
60*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
61*9880d681SAndroid Build Coastguard Worker;
62*9880d681SAndroid Build Coastguard Worker; X64-LABEL: extractps_1:
63*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
64*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    extractps $3, %xmm0, %eax
65*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
66*9880d681SAndroid Build Coastguard Worker  %s = extractelement <4 x float> %v, i32 3
67*9880d681SAndroid Build Coastguard Worker  %i = bitcast float %s to i32
68*9880d681SAndroid Build Coastguard Worker  ret i32 %i
69*9880d681SAndroid Build Coastguard Worker}
70*9880d681SAndroid Build Coastguard Workerdefine i32 @extractps_2(<4 x float> %v) nounwind {
71*9880d681SAndroid Build Coastguard Worker; X32-LABEL: extractps_2:
72*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
73*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    extractps $3, %xmm0, %eax
74*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
75*9880d681SAndroid Build Coastguard Worker;
76*9880d681SAndroid Build Coastguard Worker; X64-LABEL: extractps_2:
77*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
78*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    extractps $3, %xmm0, %eax
79*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
80*9880d681SAndroid Build Coastguard Worker  %t = bitcast <4 x float> %v to <4 x i32>
81*9880d681SAndroid Build Coastguard Worker  %s = extractelement <4 x i32> %t, i32 3
82*9880d681SAndroid Build Coastguard Worker  ret i32 %s
83*9880d681SAndroid Build Coastguard Worker}
84*9880d681SAndroid Build Coastguard Worker
85*9880d681SAndroid Build Coastguard Worker
86*9880d681SAndroid Build Coastguard Worker; The non-store form of extractps puts its result into a GPR.
87*9880d681SAndroid Build Coastguard Worker; This makes it suitable for an extract from a <4 x float> that
88*9880d681SAndroid Build Coastguard Worker; is bitcasted to i32, but unsuitable for much of anything else.
89*9880d681SAndroid Build Coastguard Worker
90*9880d681SAndroid Build Coastguard Workerdefine float @ext_1(<4 x float> %v) nounwind {
91*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ext_1:
92*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
93*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
94*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
95*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    addss LCPI5_0, %xmm0
96*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movss %xmm0, (%esp)
97*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    flds (%esp)
98*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
99*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
100*9880d681SAndroid Build Coastguard Worker;
101*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ext_1:
102*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
103*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
104*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    addss {{.*}}(%rip), %xmm0
105*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
106*9880d681SAndroid Build Coastguard Worker  %s = extractelement <4 x float> %v, i32 3
107*9880d681SAndroid Build Coastguard Worker  %t = fadd float %s, 1.0
108*9880d681SAndroid Build Coastguard Worker  ret float %t
109*9880d681SAndroid Build Coastguard Worker}
110*9880d681SAndroid Build Coastguard Workerdefine float @ext_2(<4 x float> %v) nounwind {
111*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ext_2:
112*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
113*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
114*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
115*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movss %xmm0, (%esp)
116*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    flds (%esp)
117*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
118*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
119*9880d681SAndroid Build Coastguard Worker;
120*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ext_2:
121*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
122*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
123*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
124*9880d681SAndroid Build Coastguard Worker  %s = extractelement <4 x float> %v, i32 3
125*9880d681SAndroid Build Coastguard Worker  ret float %s
126*9880d681SAndroid Build Coastguard Worker}
127*9880d681SAndroid Build Coastguard Workerdefine i32 @ext_3(<4 x i32> %v) nounwind {
128*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ext_3:
129*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
130*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pextrd $3, %xmm0, %eax
131*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
132*9880d681SAndroid Build Coastguard Worker;
133*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ext_3:
134*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
135*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pextrd $3, %xmm0, %eax
136*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
137*9880d681SAndroid Build Coastguard Worker  %i = extractelement <4 x i32> %v, i32 3
138*9880d681SAndroid Build Coastguard Worker  ret i32 %i
139*9880d681SAndroid Build Coastguard Worker}
140*9880d681SAndroid Build Coastguard Worker
141*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
142*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_1:
143*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
144*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
145*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
146*9880d681SAndroid Build Coastguard Worker;
147*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_1:
148*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
149*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3]
150*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
151*9880d681SAndroid Build Coastguard Worker  %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 21) nounwind readnone
152*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp1
153*9880d681SAndroid Build Coastguard Worker}
154*9880d681SAndroid Build Coastguard Worker
155*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker; When optimizing for speed, prefer blendps over insertps even if it means we have to
158*9880d681SAndroid Build Coastguard Worker; generate a separate movss to load the scalar operand.
159*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind {
160*9880d681SAndroid Build Coastguard Worker; X32-LABEL: blendps_not_insertps_1:
161*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
162*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
163*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
164*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
165*9880d681SAndroid Build Coastguard Worker;
166*9880d681SAndroid Build Coastguard Worker; X64-LABEL: blendps_not_insertps_1:
167*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
168*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
169*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
170*9880d681SAndroid Build Coastguard Worker  %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
171*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp1
172*9880d681SAndroid Build Coastguard Worker}
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker; When optimizing for size, generate an insertps if there's a load fold opportunity.
175*9880d681SAndroid Build Coastguard Worker; The difference between i386 and x86-64 ABIs for the float operand means we should
176*9880d681SAndroid Build Coastguard Worker; generate an insertps for X32 but not for X64!
177*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize nounwind {
178*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_or_blendps:
179*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
180*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
181*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
182*9880d681SAndroid Build Coastguard Worker;
183*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_or_blendps:
184*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
185*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
186*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
187*9880d681SAndroid Build Coastguard Worker  %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
188*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp1
189*9880d681SAndroid Build Coastguard Worker}
190*9880d681SAndroid Build Coastguard Worker
191*9880d681SAndroid Build Coastguard Worker; An insert into the low 32-bits of a vector from the low 32-bits of another vector
192*9880d681SAndroid Build Coastguard Worker; is always just a blendps because blendps is never more expensive than insertps.
193*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @blendps_not_insertps_2(<4 x float> %t1, <4 x float> %t2) nounwind {
194*9880d681SAndroid Build Coastguard Worker; X32-LABEL: blendps_not_insertps_2:
195*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
196*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
197*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
198*9880d681SAndroid Build Coastguard Worker;
199*9880d681SAndroid Build Coastguard Worker; X64-LABEL: blendps_not_insertps_2:
200*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
201*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
202*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
203*9880d681SAndroid Build Coastguard Worker  %tmp2 = extractelement <4 x float> %t2, i32 0
204*9880d681SAndroid Build Coastguard Worker  %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
205*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp1
206*9880d681SAndroid Build Coastguard Worker}
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Workerdefine i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
209*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ptestz_1:
210*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
211*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    xorl %eax, %eax
212*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    ptest %xmm1, %xmm0
213*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    sete %al
214*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
215*9880d681SAndroid Build Coastguard Worker;
216*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ptestz_1:
217*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
218*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    xorl %eax, %eax
219*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    ptest %xmm1, %xmm0
220*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    sete %al
221*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
222*9880d681SAndroid Build Coastguard Worker  %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
223*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp1
224*9880d681SAndroid Build Coastguard Worker}
225*9880d681SAndroid Build Coastguard Worker
226*9880d681SAndroid Build Coastguard Workerdefine i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
227*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ptestz_2:
228*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
229*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    ptest %xmm1, %xmm0
230*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    sbbl %eax, %eax
231*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andl $1, %eax
232*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
233*9880d681SAndroid Build Coastguard Worker;
234*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ptestz_2:
235*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
236*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    ptest %xmm1, %xmm0
237*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    sbbl %eax, %eax
238*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andl $1, %eax
239*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
240*9880d681SAndroid Build Coastguard Worker  %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
241*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp1
242*9880d681SAndroid Build Coastguard Worker}
243*9880d681SAndroid Build Coastguard Worker
244*9880d681SAndroid Build Coastguard Workerdefine i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
245*9880d681SAndroid Build Coastguard Worker; X32-LABEL: ptestz_3:
246*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
247*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    xorl %eax, %eax
248*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    ptest %xmm1, %xmm0
249*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    seta %al
250*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
251*9880d681SAndroid Build Coastguard Worker;
252*9880d681SAndroid Build Coastguard Worker; X64-LABEL: ptestz_3:
253*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
254*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    xorl %eax, %eax
255*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    ptest %xmm1, %xmm0
256*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    seta %al
257*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
258*9880d681SAndroid Build Coastguard Worker  %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
259*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp1
260*9880d681SAndroid Build Coastguard Worker}
261*9880d681SAndroid Build Coastguard Worker
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
264*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
265*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
266*9880d681SAndroid Build Coastguard Worker
267*9880d681SAndroid Build Coastguard Worker; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
268*9880d681SAndroid Build Coastguard Worker; pointless.
269*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind  {
270*9880d681SAndroid Build Coastguard Worker; X32-LABEL: buildvector:
271*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
272*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
273*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
274*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    addss %xmm1, %xmm0
275*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    addss %xmm2, %xmm3
276*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
277*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
278*9880d681SAndroid Build Coastguard Worker;
279*9880d681SAndroid Build Coastguard Worker; X64-LABEL: buildvector:
280*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
281*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
282*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
283*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    addss %xmm1, %xmm0
284*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    addss %xmm2, %xmm3
285*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
286*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
287*9880d681SAndroid Build Coastguard Workerentry:
288*9880d681SAndroid Build Coastguard Worker  %tmp7 = extractelement <2 x float> %A, i32 0
289*9880d681SAndroid Build Coastguard Worker  %tmp5 = extractelement <2 x float> %A, i32 1
290*9880d681SAndroid Build Coastguard Worker  %tmp3 = extractelement <2 x float> %B, i32 0
291*9880d681SAndroid Build Coastguard Worker  %tmp1 = extractelement <2 x float> %B, i32 1
292*9880d681SAndroid Build Coastguard Worker  %add.r = fadd float %tmp7, %tmp3
293*9880d681SAndroid Build Coastguard Worker  %add.i = fadd float %tmp5, %tmp1
294*9880d681SAndroid Build Coastguard Worker  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
295*9880d681SAndroid Build Coastguard Worker  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
296*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp9
297*9880d681SAndroid Build Coastguard Worker}
298*9880d681SAndroid Build Coastguard Worker
299*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_shufflevector_1(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
300*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_shufflevector_1:
301*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
302*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
303*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
304*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
305*9880d681SAndroid Build Coastguard Worker;
306*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_shufflevector_1:
307*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
308*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
309*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
310*9880d681SAndroid Build Coastguard Workerentry:
311*9880d681SAndroid Build Coastguard Worker  %0 = load <4 x float>, <4 x float>* %pb, align 16
312*9880d681SAndroid Build Coastguard Worker  %vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
313*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit6
314*9880d681SAndroid Build Coastguard Worker}
315*9880d681SAndroid Build Coastguard Worker
316*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_shufflevector_2(<4 x float> %a, <4 x float> %b) {
317*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_shufflevector_2:
318*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
319*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3]
320*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
321*9880d681SAndroid Build Coastguard Worker;
322*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_shufflevector_2:
323*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
324*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3]
325*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
326*9880d681SAndroid Build Coastguard Workerentry:
327*9880d681SAndroid Build Coastguard Worker  %vecinit6 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
328*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit6
329*9880d681SAndroid Build Coastguard Worker}
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Worker; For loading an i32 from memory into an xmm register we use pinsrd
332*9880d681SAndroid Build Coastguard Worker; instead of insertps
333*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocapture readonly %pb) {
334*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pinsrd_from_shufflevector_i32:
335*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
336*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
337*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = mem[0,1,2,0]
338*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
339*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
340*9880d681SAndroid Build Coastguard Worker;
341*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pinsrd_from_shufflevector_i32:
342*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
343*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = mem[0,1,2,0]
344*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
345*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
346*9880d681SAndroid Build Coastguard Workerentry:
347*9880d681SAndroid Build Coastguard Worker  %0 = load <4 x i32>, <4 x i32>* %pb, align 16
348*9880d681SAndroid Build Coastguard Worker  %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
349*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit6
350*9880d681SAndroid Build Coastguard Worker}
351*9880d681SAndroid Build Coastguard Worker
352*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
353*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_shufflevector_i32_2:
354*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
355*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
356*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
357*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
358*9880d681SAndroid Build Coastguard Worker;
359*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_shufflevector_i32_2:
360*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
361*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
362*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
363*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
364*9880d681SAndroid Build Coastguard Workerentry:
365*9880d681SAndroid Build Coastguard Worker  %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
366*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit6
367*9880d681SAndroid Build Coastguard Worker}
368*9880d681SAndroid Build Coastguard Worker
369*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_load_ins_elt_undef(<4 x float> %a, float* %b) {
370*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_load_ins_elt_undef:
371*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
372*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
373*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
374*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
375*9880d681SAndroid Build Coastguard Worker;
376*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_load_ins_elt_undef:
377*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
378*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
379*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
380*9880d681SAndroid Build Coastguard Worker  %1 = load float, float* %b, align 4
381*9880d681SAndroid Build Coastguard Worker  %2 = insertelement <4 x float> undef, float %1, i32 0
382*9880d681SAndroid Build Coastguard Worker  %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
383*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %result
384*9880d681SAndroid Build Coastguard Worker}
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Worker; TODO: Like on pinsrd_from_shufflevector_i32, remove this mov instr
387*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
388*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_load_ins_elt_undef_i32:
389*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
390*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
391*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
392*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
393*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
394*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
395*9880d681SAndroid Build Coastguard Worker;
396*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_load_ins_elt_undef_i32:
397*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
398*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
399*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
400*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
401*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
402*9880d681SAndroid Build Coastguard Worker  %1 = load i32, i32* %b, align 4
403*9880d681SAndroid Build Coastguard Worker  %2 = insertelement <4 x i32> undef, i32 %1, i32 0
404*9880d681SAndroid Build Coastguard Worker  %result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
405*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %result
406*9880d681SAndroid Build Coastguard Worker}
407*9880d681SAndroid Build Coastguard Worker
408*9880d681SAndroid Build Coastguard Worker;;;;;; Shuffles optimizable with a single insertps or blend instruction
409*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
410*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XYZ0:
411*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
412*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    xorps %xmm1, %xmm1
413*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
414*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
415*9880d681SAndroid Build Coastguard Worker;
416*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XYZ0:
417*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
418*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    xorps %xmm1, %xmm1
419*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
420*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
421*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
422*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
423*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %x, i32 1
424*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
425*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %x, i32 2
426*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
427*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
428*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
429*9880d681SAndroid Build Coastguard Worker}
430*9880d681SAndroid Build Coastguard Worker
431*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
432*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XY00:
433*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
434*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
435*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
436*9880d681SAndroid Build Coastguard Worker;
437*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XY00:
438*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
439*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
440*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
441*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
442*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
443*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %x, i32 1
444*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
445*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
446*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
447*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
448*9880d681SAndroid Build Coastguard Worker}
449*9880d681SAndroid Build Coastguard Worker
450*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) {
451*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XYY0:
452*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
453*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
454*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
455*9880d681SAndroid Build Coastguard Worker;
456*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XYY0:
457*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
458*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
459*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
460*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
461*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
462*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %x, i32 1
463*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
464*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext1, i32 2
465*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
466*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
467*9880d681SAndroid Build Coastguard Worker}
468*9880d681SAndroid Build Coastguard Worker
469*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) {
470*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_XYW0:
471*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
472*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
473*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
474*9880d681SAndroid Build Coastguard Worker;
475*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_XYW0:
476*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
477*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
478*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
479*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
480*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
481*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %x, i32 1
482*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
483*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %x, i32 3
484*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float %vecext2, i32 2
485*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
486*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
487*9880d681SAndroid Build Coastguard Worker}
488*9880d681SAndroid Build Coastguard Worker
489*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
490*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_W00W:
491*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
492*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
493*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
494*9880d681SAndroid Build Coastguard Worker;
495*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_W00W:
496*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
497*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
498*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
499*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 3
500*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
501*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float 0.0, i32 1
502*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
503*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float %vecext, i32 3
504*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
505*9880d681SAndroid Build Coastguard Worker}
506*9880d681SAndroid Build Coastguard Worker
507*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
508*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_X00A:
509*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
510*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
511*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
512*9880d681SAndroid Build Coastguard Worker;
513*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_X00A:
514*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
515*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
516*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
517*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
518*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
519*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
520*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
521*9880d681SAndroid Build Coastguard Worker  %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
522*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
523*9880d681SAndroid Build Coastguard Worker}
524*9880d681SAndroid Build Coastguard Worker
525*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
526*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_X00X:
527*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
528*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
529*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
530*9880d681SAndroid Build Coastguard Worker;
531*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_X00X:
532*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
533*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
534*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
535*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
536*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
537*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
538*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
539*9880d681SAndroid Build Coastguard Worker  %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
540*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
541*9880d681SAndroid Build Coastguard Worker}
542*9880d681SAndroid Build Coastguard Worker
543*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
544*9880d681SAndroid Build Coastguard Worker; X32-LABEL: shuf_X0YC:
545*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
546*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
547*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
548*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
549*9880d681SAndroid Build Coastguard Worker;
550*9880d681SAndroid Build Coastguard Worker; X64-LABEL: shuf_X0YC:
551*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
552*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
553*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
554*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
555*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
556*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
557*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
558*9880d681SAndroid Build Coastguard Worker  %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
559*9880d681SAndroid Build Coastguard Worker  %vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
560*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
561*9880d681SAndroid Build Coastguard Worker}
562*9880d681SAndroid Build Coastguard Worker
563*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
564*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XYZ0:
565*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
566*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pxor %xmm1, %xmm1
567*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
568*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
569*9880d681SAndroid Build Coastguard Worker;
570*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XYZ0:
571*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
572*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pxor %xmm1, %xmm1
573*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
574*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
575*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
576*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
577*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %x, i32 1
578*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
579*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x i32> %x, i32 2
580*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
581*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
582*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit5
583*9880d681SAndroid Build Coastguard Worker}
584*9880d681SAndroid Build Coastguard Worker
585*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
586*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XY00:
587*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
588*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
589*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
590*9880d681SAndroid Build Coastguard Worker;
591*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XY00:
592*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
593*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
594*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
595*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
596*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
597*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %x, i32 1
598*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
599*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
600*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
601*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit4
602*9880d681SAndroid Build Coastguard Worker}
603*9880d681SAndroid Build Coastguard Worker
604*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
605*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XYY0:
606*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
607*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
608*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pxor %xmm0, %xmm0
609*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
610*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
611*9880d681SAndroid Build Coastguard Worker;
612*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XYY0:
613*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
614*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
615*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pxor %xmm0, %xmm0
616*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
617*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
618*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
619*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
620*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %x, i32 1
621*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
622*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext1, i32 2
623*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
624*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit5
625*9880d681SAndroid Build Coastguard Worker}
626*9880d681SAndroid Build Coastguard Worker
627*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
628*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_XYW0:
629*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
630*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3]
631*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pxor %xmm0, %xmm0
632*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
633*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
634*9880d681SAndroid Build Coastguard Worker;
635*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_XYW0:
636*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
637*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3]
638*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pxor %xmm0, %xmm0
639*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
640*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
641*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
642*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
643*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %x, i32 1
644*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
645*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x i32> %x, i32 3
646*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %vecext2, i32 2
647*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
648*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit4
649*9880d681SAndroid Build Coastguard Worker}
650*9880d681SAndroid Build Coastguard Worker
651*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
652*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_W00W:
653*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
654*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
655*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pxor %xmm0, %xmm0
656*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
657*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
658*9880d681SAndroid Build Coastguard Worker;
659*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_W00W:
660*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
661*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
662*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pxor %xmm0, %xmm0
663*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
664*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
665*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 3
666*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
667*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit, i32 0, i32 1
668*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
669*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x i32> %vecinit3, i32 %vecext, i32 3
670*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit4
671*9880d681SAndroid Build Coastguard Worker}
672*9880d681SAndroid Build Coastguard Worker
673*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
674*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_X00A:
675*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
676*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pxor %xmm2, %xmm2
677*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
678*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
679*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
680*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
681*9880d681SAndroid Build Coastguard Worker;
682*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_X00A:
683*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
684*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pxor %xmm2, %xmm2
685*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
686*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
687*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
688*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
689*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
690*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
691*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
692*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
693*9880d681SAndroid Build Coastguard Worker  %vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
694*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit4
695*9880d681SAndroid Build Coastguard Worker}
696*9880d681SAndroid Build Coastguard Worker
697*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
698*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_X00X:
699*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
700*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pxor %xmm1, %xmm1
701*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
702*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
703*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
704*9880d681SAndroid Build Coastguard Worker;
705*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_X00X:
706*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
707*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pxor %xmm1, %xmm1
708*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
709*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
710*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
711*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
712*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
713*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
714*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
715*9880d681SAndroid Build Coastguard Worker  %vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
716*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit4
717*9880d681SAndroid Build Coastguard Worker}
718*9880d681SAndroid Build Coastguard Worker
719*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
720*9880d681SAndroid Build Coastguard Worker; X32-LABEL: i32_shuf_X0YC:
721*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
722*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
723*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
724*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
725*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
726*9880d681SAndroid Build Coastguard Worker;
727*9880d681SAndroid Build Coastguard Worker; X64-LABEL: i32_shuf_X0YC:
728*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
729*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
730*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
731*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
732*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
733*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %x, i32 0
734*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
735*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
736*9880d681SAndroid Build Coastguard Worker  %vecinit3 = shufflevector <4 x i32> %vecinit1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
737*9880d681SAndroid Build Coastguard Worker  %vecinit5 = shufflevector <4 x i32> %vecinit3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
738*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit5
739*9880d681SAndroid Build Coastguard Worker}
740*9880d681SAndroid Build Coastguard Worker
741*9880d681SAndroid Build Coastguard Worker;; Test for a bug in the first implementation of LowerBuildVectorv4x32
742*9880d681SAndroid Build Coastguard Workerdefine < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
743*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_insertps_no_undef:
744*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
745*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    xorps %xmm1, %xmm1
746*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
747*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    maxps %xmm1, %xmm0
748*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
749*9880d681SAndroid Build Coastguard Worker;
750*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_insertps_no_undef:
751*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
752*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    xorps %xmm1, %xmm1
753*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
754*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    maxps %xmm1, %xmm0
755*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
756*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
757*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
758*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %x, i32 1
759*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
760*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %x, i32 2
761*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
762*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
763*9880d681SAndroid Build Coastguard Worker  %mask = fcmp olt <4 x float> %vecinit5, %x
764*9880d681SAndroid Build Coastguard Worker  %res = select  <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
765*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
766*9880d681SAndroid Build Coastguard Worker}
767*9880d681SAndroid Build Coastguard Worker
768*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
769*9880d681SAndroid Build Coastguard Worker; X32-LABEL: blendvb_fallback:
770*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
771*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    psllw $15, %xmm0
772*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    psraw $15, %xmm0
773*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendvb %xmm1, %xmm2
774*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movdqa %xmm2, %xmm0
775*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
776*9880d681SAndroid Build Coastguard Worker;
777*9880d681SAndroid Build Coastguard Worker; X64-LABEL: blendvb_fallback:
778*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
779*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    psllw $15, %xmm0
780*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    psraw $15, %xmm0
781*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendvb %xmm1, %xmm2
782*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movdqa %xmm2, %xmm0
783*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
784*9880d681SAndroid Build Coastguard Worker  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
785*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %ret
786*9880d681SAndroid Build Coastguard Worker}
787*9880d681SAndroid Build Coastguard Worker
788*9880d681SAndroid Build Coastguard Worker; On X32, account for the argument's move to registers
789*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
790*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_vector_load:
791*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
792*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
793*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps    $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
794*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
795*9880d681SAndroid Build Coastguard Worker;
796*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_vector_load:
797*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
798*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps    $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
799*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
800*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float>* %pb, align 16
801*9880d681SAndroid Build Coastguard Worker  %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
802*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %2
803*9880d681SAndroid Build Coastguard Worker}
804*9880d681SAndroid Build Coastguard Worker
805*9880d681SAndroid Build Coastguard Worker;; Use a non-zero CountS for insertps
806*9880d681SAndroid Build Coastguard Worker;; Try to match a bit more of the instr, since we need the load's offset.
807*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
808*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_vector_load_offset:
809*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
810*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
811*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps    $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
812*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
813*9880d681SAndroid Build Coastguard Worker;
814*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_vector_load_offset:
815*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
816*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps    $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
817*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
818*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float>* %pb, align 16
819*9880d681SAndroid Build Coastguard Worker  %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
820*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %2
821*9880d681SAndroid Build Coastguard Worker}
822*9880d681SAndroid Build Coastguard Worker
823*9880d681SAndroid Build Coastguard Worker;; Try to match a bit more of the instr, since we need the load's offset.
824*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
825*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_vector_load_offset_2:
826*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
827*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
828*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
829*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    shll $4, %ecx
830*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps    $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
831*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
832*9880d681SAndroid Build Coastguard Worker;
833*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_vector_load_offset_2:
834*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
835*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    shlq $4, %rsi
836*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps    $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
837*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
838*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
839*9880d681SAndroid Build Coastguard Worker  %2 = load <4 x float>, <4 x float>* %1, align 16
840*9880d681SAndroid Build Coastguard Worker  %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
841*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %3
842*9880d681SAndroid Build Coastguard Worker}
843*9880d681SAndroid Build Coastguard Worker
844*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) {
845*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_broadcast_loadf32:
846*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
847*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
848*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
849*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
850*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
851*9880d681SAndroid Build Coastguard Worker;
852*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_broadcast_loadf32:
853*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
854*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
855*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
856*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds float, float* %fb, i64 %index
857*9880d681SAndroid Build Coastguard Worker  %2 = load float, float* %1, align 4
858*9880d681SAndroid Build Coastguard Worker  %3 = insertelement <4 x float> undef, float %2, i32 0
859*9880d681SAndroid Build Coastguard Worker  %4 = insertelement <4 x float> %3, float %2, i32 1
860*9880d681SAndroid Build Coastguard Worker  %5 = insertelement <4 x float> %4, float %2, i32 2
861*9880d681SAndroid Build Coastguard Worker  %6 = insertelement <4 x float> %5, float %2, i32 3
862*9880d681SAndroid Build Coastguard Worker  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
863*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %7
864*9880d681SAndroid Build Coastguard Worker}
865*9880d681SAndroid Build Coastguard Worker
866*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) {
867*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_broadcast_loadv4f32:
868*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
869*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
870*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
871*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
872*9880d681SAndroid Build Coastguard Worker;
873*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_broadcast_loadv4f32:
874*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
875*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
876*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
877*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float>* %b, align 4
878*9880d681SAndroid Build Coastguard Worker  %2 = extractelement <4 x float> %1, i32 0
879*9880d681SAndroid Build Coastguard Worker  %3 = insertelement <4 x float> undef, float %2, i32 0
880*9880d681SAndroid Build Coastguard Worker  %4 = insertelement <4 x float> %3, float %2, i32 1
881*9880d681SAndroid Build Coastguard Worker  %5 = insertelement <4 x float> %4, float %2, i32 2
882*9880d681SAndroid Build Coastguard Worker  %6 = insertelement <4 x float> %5, float %2, i32 3
883*9880d681SAndroid Build Coastguard Worker  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
884*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %7
885*9880d681SAndroid Build Coastguard Worker}
886*9880d681SAndroid Build Coastguard Worker
887*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
888*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_from_broadcast_multiple_use:
889*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
890*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
891*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
892*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
893*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
894*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
895*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0]
896*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
897*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    addps %xmm1, %xmm0
898*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    addps %xmm2, %xmm3
899*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    addps %xmm3, %xmm0
900*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
901*9880d681SAndroid Build Coastguard Worker;
902*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_from_broadcast_multiple_use:
903*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
904*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
905*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
906*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
907*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0]
908*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
909*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    addps %xmm1, %xmm0
910*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    addps %xmm2, %xmm3
911*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    addps %xmm3, %xmm0
912*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
913*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds float, float* %fb, i64 %index
914*9880d681SAndroid Build Coastguard Worker  %2 = load float, float* %1, align 4
915*9880d681SAndroid Build Coastguard Worker  %3 = insertelement <4 x float> undef, float %2, i32 0
916*9880d681SAndroid Build Coastguard Worker  %4 = insertelement <4 x float> %3, float %2, i32 1
917*9880d681SAndroid Build Coastguard Worker  %5 = insertelement <4 x float> %4, float %2, i32 2
918*9880d681SAndroid Build Coastguard Worker  %6 = insertelement <4 x float> %5, float %2, i32 3
919*9880d681SAndroid Build Coastguard Worker  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
920*9880d681SAndroid Build Coastguard Worker  %8 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %b, <4 x float> %6, i32 48)
921*9880d681SAndroid Build Coastguard Worker  %9 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %c, <4 x float> %6, i32 48)
922*9880d681SAndroid Build Coastguard Worker  %10 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %d, <4 x float> %6, i32 48)
923*9880d681SAndroid Build Coastguard Worker  %11 = fadd <4 x float> %7, %8
924*9880d681SAndroid Build Coastguard Worker  %12 = fadd <4 x float> %9, %10
925*9880d681SAndroid Build Coastguard Worker  %13 = fadd <4 x float> %11, %12
926*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %13
927*9880d681SAndroid Build Coastguard Worker}
928*9880d681SAndroid Build Coastguard Worker
929*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
930*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_with_undefs:
931*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
932*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
933*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
934*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
935*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movapd %xmm1, %xmm0
936*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
937*9880d681SAndroid Build Coastguard Worker;
938*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_with_undefs:
939*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
940*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
941*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
942*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movapd %xmm1, %xmm0
943*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
944*9880d681SAndroid Build Coastguard Worker  %1 = load float, float* %b, align 4
945*9880d681SAndroid Build Coastguard Worker  %2 = insertelement <4 x float> undef, float %1, i32 0
946*9880d681SAndroid Build Coastguard Worker  %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
947*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %result
948*9880d681SAndroid Build Coastguard Worker}
949*9880d681SAndroid Build Coastguard Worker
950*9880d681SAndroid Build Coastguard Worker; Test for a bug in X86ISelLowering.cpp:getINSERTPS where we were using
951*9880d681SAndroid Build Coastguard Worker; the destination index to change the load, instead of the source index.
952*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
953*9880d681SAndroid Build Coastguard Worker; X32-LABEL: pr20087:
954*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
955*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
956*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0]
957*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
958*9880d681SAndroid Build Coastguard Worker;
959*9880d681SAndroid Build Coastguard Worker; X64-LABEL: pr20087:
960*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
961*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0]
962*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
963*9880d681SAndroid Build Coastguard Worker  %load = load <4 x float> , <4 x float> *%ptr
964*9880d681SAndroid Build Coastguard Worker  %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
965*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %ret
966*9880d681SAndroid Build Coastguard Worker}
967*9880d681SAndroid Build Coastguard Worker
968*9880d681SAndroid Build Coastguard Worker; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
969*9880d681SAndroid Build Coastguard Workerdefine void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
970*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_pr20411:
971*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
972*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
973*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
974*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
975*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movdqu %xmm1, (%eax)
976*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
977*9880d681SAndroid Build Coastguard Worker;
978*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_pr20411:
979*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
980*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
981*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
982*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movdqu %xmm1, (%rdi)
983*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
984*9880d681SAndroid Build Coastguard Worker  %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
985*9880d681SAndroid Build Coastguard Worker  %ptrcast = bitcast i32* %RET to <4 x i32>*
986*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
987*9880d681SAndroid Build Coastguard Worker  ret void
988*9880d681SAndroid Build Coastguard Worker}
989*9880d681SAndroid Build Coastguard Worker
990*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_4(<4 x float> %A, <4 x float> %B) {
991*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_4:
992*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
993*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
994*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
995*9880d681SAndroid Build Coastguard Worker;
996*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_4:
997*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
998*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
999*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1000*9880d681SAndroid Build Coastguard Workerentry:
1001*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
1002*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
1003*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
1004*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %B, i32 2
1005*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
1006*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
1007*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
1008*9880d681SAndroid Build Coastguard Worker}
1009*9880d681SAndroid Build Coastguard Worker
1010*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_5(<4 x float> %A, <4 x float> %B) {
1011*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_5:
1012*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1013*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
1014*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1015*9880d681SAndroid Build Coastguard Worker;
1016*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_5:
1017*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1018*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
1019*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1020*9880d681SAndroid Build Coastguard Workerentry:
1021*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
1022*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
1023*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %B, i32 1
1024*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
1025*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
1026*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
1027*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
1028*9880d681SAndroid Build Coastguard Worker}
1029*9880d681SAndroid Build Coastguard Worker
1030*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_6(<4 x float> %A, <4 x float> %B) {
1031*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_6:
1032*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1033*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
1034*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1035*9880d681SAndroid Build Coastguard Worker;
1036*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_6:
1037*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1038*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
1039*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1040*9880d681SAndroid Build Coastguard Workerentry:
1041*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 1
1042*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
1043*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %B, i32 2
1044*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
1045*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
1046*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit3
1047*9880d681SAndroid Build Coastguard Worker}
1048*9880d681SAndroid Build Coastguard Worker
1049*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_7(<4 x float> %A, <4 x float> %B) {
1050*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_7:
1051*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1052*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
1053*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1054*9880d681SAndroid Build Coastguard Worker;
1055*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_7:
1056*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1057*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
1058*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1059*9880d681SAndroid Build Coastguard Workerentry:
1060*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
1061*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
1062*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
1063*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %B, i32 1
1064*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
1065*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
1066*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
1067*9880d681SAndroid Build Coastguard Worker}
1068*9880d681SAndroid Build Coastguard Worker
1069*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_8(<4 x float> %A, <4 x float> %B) {
1070*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_8:
1071*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1072*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1073*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1074*9880d681SAndroid Build Coastguard Worker;
1075*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_8:
1076*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1077*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1078*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1079*9880d681SAndroid Build Coastguard Workerentry:
1080*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
1081*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
1082*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %B, i32 0
1083*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
1084*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
1085*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
1086*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
1087*9880d681SAndroid Build Coastguard Worker}
1088*9880d681SAndroid Build Coastguard Worker
1089*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_9(<4 x float> %A, <4 x float> %B) {
1090*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_9:
1091*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1092*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
1093*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movaps %xmm1, %xmm0
1094*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1095*9880d681SAndroid Build Coastguard Worker;
1096*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_9:
1097*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1098*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
1099*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movaps %xmm1, %xmm0
1100*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1101*9880d681SAndroid Build Coastguard Workerentry:
1102*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
1103*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
1104*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %B, i32 2
1105*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
1106*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
1107*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit3
1108*9880d681SAndroid Build Coastguard Worker}
1109*9880d681SAndroid Build Coastguard Worker
1110*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_10(<4 x float> %A)
1111*9880d681SAndroid Build Coastguard Worker; X32-LABEL: insertps_10:
1112*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0:
1113*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
1114*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1115*9880d681SAndroid Build Coastguard Worker;
1116*9880d681SAndroid Build Coastguard Worker; X64-LABEL: insertps_10:
1117*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0:
1118*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
1119*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1120*9880d681SAndroid Build Coastguard Worker{
1121*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
1122*9880d681SAndroid Build Coastguard Worker  %vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
1123*9880d681SAndroid Build Coastguard Worker  %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
1124*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecbuild2
1125*9880d681SAndroid Build Coastguard Worker}
1126*9880d681SAndroid Build Coastguard Worker
1127*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
1128*9880d681SAndroid Build Coastguard Worker; X32-LABEL: build_vector_to_shuffle_1:
1129*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1130*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    xorps %xmm1, %xmm1
1131*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1132*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1133*9880d681SAndroid Build Coastguard Worker;
1134*9880d681SAndroid Build Coastguard Worker; X64-LABEL: build_vector_to_shuffle_1:
1135*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1136*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    xorps %xmm1, %xmm1
1137*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1138*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1139*9880d681SAndroid Build Coastguard Workerentry:
1140*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 1
1141*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
1142*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
1143*9880d681SAndroid Build Coastguard Worker  %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1144*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit3
1145*9880d681SAndroid Build Coastguard Worker}
1146*9880d681SAndroid Build Coastguard Worker
1147*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
1148*9880d681SAndroid Build Coastguard Worker; X32-LABEL: build_vector_to_shuffle_2:
1149*9880d681SAndroid Build Coastguard Worker; X32:       ## BB#0: ## %entry
1150*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    xorps %xmm1, %xmm1
1151*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1152*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1153*9880d681SAndroid Build Coastguard Worker;
1154*9880d681SAndroid Build Coastguard Worker; X64-LABEL: build_vector_to_shuffle_2:
1155*9880d681SAndroid Build Coastguard Worker; X64:       ## BB#0: ## %entry
1156*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    xorps %xmm1, %xmm1
1157*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1158*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1159*9880d681SAndroid Build Coastguard Workerentry:
1160*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 1
1161*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
1162*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
1163*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit1
1164*9880d681SAndroid Build Coastguard Worker}
1165