xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx512-mov.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3
4define i32 @test1(float %x) {
5; CHECK-LABEL: test1:
6; CHECK:       ## BB#0:
7; CHECK-NEXT:    vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0]
8; CHECK-NEXT:    retq ## encoding: [0xc3]
9   %res = bitcast float %x to i32
10   ret i32 %res
11}
12
13define <4 x i32> @test2(i32 %x) {
14; CHECK-LABEL: test2:
15; CHECK:       ## BB#0:
16; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
17; CHECK-NEXT:    retq ## encoding: [0xc3]
18   %res = insertelement <4 x i32>undef, i32 %x, i32 0
19   ret <4 x i32>%res
20}
21
22define <2 x i64> @test3(i64 %x) {
23; CHECK-LABEL: test3:
24; CHECK:       ## BB#0:
25; CHECK-NEXT:    vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7]
26; CHECK-NEXT:    retq ## encoding: [0xc3]
27   %res = insertelement <2 x i64>undef, i64 %x, i32 0
28   ret <2 x i64>%res
29}
30
31define <4 x i32> @test4(i32* %x) {
32; CHECK-LABEL: test4:
33; CHECK:       ## BB#0:
34; CHECK-NEXT:    vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
35; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
36; CHECK-NEXT:    retq ## encoding: [0xc3]
37   %y = load i32, i32* %x
38   %res = insertelement <4 x i32>undef, i32 %y, i32 0
39   ret <4 x i32>%res
40}
41
42define void @test5(float %x, float* %y) {
43; CHECK-LABEL: test5:
44; CHECK:       ## BB#0:
45; CHECK-NEXT:    vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07]
46; CHECK-NEXT:    retq ## encoding: [0xc3]
47   store float %x, float* %y, align 4
48   ret void
49}
50
51define void @test6(double %x, double* %y) {
52; CHECK-LABEL: test6:
53; CHECK:       ## BB#0:
54; CHECK-NEXT:    vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07]
55; CHECK-NEXT:    retq ## encoding: [0xc3]
56   store double %x, double* %y, align 8
57   ret void
58}
59
60define float @test7(i32* %x) {
61; CHECK-LABEL: test7:
62; CHECK:       ## BB#0:
63; CHECK-NEXT:    vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
64; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
65; CHECK-NEXT:    retq ## encoding: [0xc3]
66   %y = load i32, i32* %x
67   %res = bitcast i32 %y to float
68   ret float %res
69}
70
71define i32 @test8(<4 x i32> %x) {
72; CHECK-LABEL: test8:
73; CHECK:       ## BB#0:
74; CHECK-NEXT:    vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0]
75; CHECK-NEXT:    retq ## encoding: [0xc3]
76   %res = extractelement <4 x i32> %x, i32 0
77   ret i32 %res
78}
79
80define i64 @test9(<2 x i64> %x) {
81; CHECK-LABEL: test9:
82; CHECK:       ## BB#0:
83; CHECK-NEXT:    vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0]
84; CHECK-NEXT:    retq ## encoding: [0xc3]
85   %res = extractelement <2 x i64> %x, i32 0
86   ret i64 %res
87}
88
89define <4 x i32> @test10(i32* %x) {
90; CHECK-LABEL: test10:
91; CHECK:       ## BB#0:
92; CHECK-NEXT:    vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
93; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
94; CHECK-NEXT:    retq ## encoding: [0xc3]
95   %y = load i32, i32* %x, align 4
96   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
97   ret <4 x i32>%res
98}
99
100define <4 x float> @test11(float* %x) {
101; CHECK-LABEL: test11:
102; CHECK:       ## BB#0:
103; CHECK-NEXT:    vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
104; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
105; CHECK-NEXT:    retq ## encoding: [0xc3]
106   %y = load float, float* %x, align 4
107   %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
108   ret <4 x float>%res
109}
110
111define <2 x double> @test12(double* %x) {
112; CHECK-LABEL: test12:
113; CHECK:       ## BB#0:
114; CHECK-NEXT:    vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07]
115; CHECK-NEXT:    ## xmm0 = mem[0],zero
116; CHECK-NEXT:    retq ## encoding: [0xc3]
117   %y = load double, double* %x, align 8
118   %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
119   ret <2 x double>%res
120}
121
122define <2 x i64> @test13(i64 %x) {
123; CHECK-LABEL: test13:
124; CHECK:       ## BB#0:
125; CHECK-NEXT:    vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7]
126; CHECK-NEXT:    retq ## encoding: [0xc3]
127   %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
128   ret <2 x i64>%res
129}
130
131define <4 x i32> @test14(i32 %x) {
132; CHECK-LABEL: test14:
133; CHECK:       ## BB#0:
134; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
135; CHECK-NEXT:    retq ## encoding: [0xc3]
136   %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
137   ret <4 x i32>%res
138}
139
140define <4 x i32> @test15(i32* %x) {
141; CHECK-LABEL: test15:
142; CHECK:       ## BB#0:
143; CHECK-NEXT:    vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
144; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
145; CHECK-NEXT:    retq ## encoding: [0xc3]
146   %y = load i32, i32* %x, align 4
147   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
148   ret <4 x i32>%res
149}
150
151define <16 x i32> @test16(i8 * %addr) {
152; CHECK-LABEL: test16:
153; CHECK:       ## BB#0:
154; CHECK-NEXT:    vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07]
155; CHECK-NEXT:    retq ## encoding: [0xc3]
156  %vaddr = bitcast i8* %addr to <16 x i32>*
157  %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
158  ret <16 x i32>%res
159}
160
161define <16 x i32> @test17(i8 * %addr) {
162; CHECK-LABEL: test17:
163; CHECK:       ## BB#0:
164; CHECK-NEXT:    vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07]
165; CHECK-NEXT:    retq ## encoding: [0xc3]
166  %vaddr = bitcast i8* %addr to <16 x i32>*
167  %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
168  ret <16 x i32>%res
169}
170
171define void @test18(i8 * %addr, <8 x i64> %data) {
172; CHECK-LABEL: test18:
173; CHECK:       ## BB#0:
174; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07]
175; CHECK-NEXT:    retq ## encoding: [0xc3]
176  %vaddr = bitcast i8* %addr to <8 x i64>*
177  store <8 x i64>%data, <8 x i64>* %vaddr, align 64
178  ret void
179}
180
181define void @test19(i8 * %addr, <16 x i32> %data) {
182; CHECK-LABEL: test19:
183; CHECK:       ## BB#0:
184; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07]
185; CHECK-NEXT:    retq ## encoding: [0xc3]
186  %vaddr = bitcast i8* %addr to <16 x i32>*
187  store <16 x i32>%data, <16 x i32>* %vaddr, align 1
188  ret void
189}
190
191define void @test20(i8 * %addr, <16 x i32> %data) {
192; CHECK-LABEL: test20:
193; CHECK:       ## BB#0:
194; CHECK-NEXT:    vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07]
195; CHECK-NEXT:    retq ## encoding: [0xc3]
196  %vaddr = bitcast i8* %addr to <16 x i32>*
197  store <16 x i32>%data, <16 x i32>* %vaddr, align 64
198  ret void
199}
200
201define  <8 x i64> @test21(i8 * %addr) {
202; CHECK-LABEL: test21:
203; CHECK:       ## BB#0:
204; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
205; CHECK-NEXT:    retq ## encoding: [0xc3]
206  %vaddr = bitcast i8* %addr to <8 x i64>*
207  %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
208  ret <8 x i64>%res
209}
210
211define void @test22(i8 * %addr, <8 x i64> %data) {
212; CHECK-LABEL: test22:
213; CHECK:       ## BB#0:
214; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07]
215; CHECK-NEXT:    retq ## encoding: [0xc3]
216  %vaddr = bitcast i8* %addr to <8 x i64>*
217  store <8 x i64>%data, <8 x i64>* %vaddr, align 1
218  ret void
219}
220
221define <8 x i64> @test23(i8 * %addr) {
222; CHECK-LABEL: test23:
223; CHECK:       ## BB#0:
224; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
225; CHECK-NEXT:    retq ## encoding: [0xc3]
226  %vaddr = bitcast i8* %addr to <8 x i64>*
227  %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
228  ret <8 x i64>%res
229}
230
231define void @test24(i8 * %addr, <8 x double> %data) {
232; CHECK-LABEL: test24:
233; CHECK:       ## BB#0:
234; CHECK-NEXT:    vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07]
235; CHECK-NEXT:    retq ## encoding: [0xc3]
236  %vaddr = bitcast i8* %addr to <8 x double>*
237  store <8 x double>%data, <8 x double>* %vaddr, align 64
238  ret void
239}
240
241define <8 x double> @test25(i8 * %addr) {
242; CHECK-LABEL: test25:
243; CHECK:       ## BB#0:
244; CHECK-NEXT:    vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
245; CHECK-NEXT:    retq ## encoding: [0xc3]
246  %vaddr = bitcast i8* %addr to <8 x double>*
247  %res = load <8 x double>, <8 x double>* %vaddr, align 64
248  ret <8 x double>%res
249}
250
251define void @test26(i8 * %addr, <16 x float> %data) {
252; CHECK-LABEL: test26:
253; CHECK:       ## BB#0:
254; CHECK-NEXT:    vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
255; CHECK-NEXT:    retq ## encoding: [0xc3]
256  %vaddr = bitcast i8* %addr to <16 x float>*
257  store <16 x float>%data, <16 x float>* %vaddr, align 64
258  ret void
259}
260
261define <16 x float> @test27(i8 * %addr) {
262; CHECK-LABEL: test27:
263; CHECK:       ## BB#0:
264; CHECK-NEXT:    vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
265; CHECK-NEXT:    retq ## encoding: [0xc3]
266  %vaddr = bitcast i8* %addr to <16 x float>*
267  %res = load <16 x float>, <16 x float>* %vaddr, align 64
268  ret <16 x float>%res
269}
270
271define void @test28(i8 * %addr, <8 x double> %data) {
272; CHECK-LABEL: test28:
273; CHECK:       ## BB#0:
274; CHECK-NEXT:    vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07]
275; CHECK-NEXT:    retq ## encoding: [0xc3]
276  %vaddr = bitcast i8* %addr to <8 x double>*
277  store <8 x double>%data, <8 x double>* %vaddr, align 1
278  ret void
279}
280
281define <8 x double> @test29(i8 * %addr) {
282; CHECK-LABEL: test29:
283; CHECK:       ## BB#0:
284; CHECK-NEXT:    vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
285; CHECK-NEXT:    retq ## encoding: [0xc3]
286  %vaddr = bitcast i8* %addr to <8 x double>*
287  %res = load <8 x double>, <8 x double>* %vaddr, align 1
288  ret <8 x double>%res
289}
290
291define void @test30(i8 * %addr, <16 x float> %data) {
292; CHECK-LABEL: test30:
293; CHECK:       ## BB#0:
294; CHECK-NEXT:    vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
295; CHECK-NEXT:    retq ## encoding: [0xc3]
296  %vaddr = bitcast i8* %addr to <16 x float>*
297  store <16 x float>%data, <16 x float>* %vaddr, align 1
298  ret void
299}
300
301define <16 x float> @test31(i8 * %addr) {
302; CHECK-LABEL: test31:
303; CHECK:       ## BB#0:
304; CHECK-NEXT:    vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
305; CHECK-NEXT:    retq ## encoding: [0xc3]
306  %vaddr = bitcast i8* %addr to <16 x float>*
307  %res = load <16 x float>, <16 x float>* %vaddr, align 1
308  ret <16 x float>%res
309}
310
311define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
312; CHECK-LABEL: test32:
313; CHECK:       ## BB#0:
314; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
315; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
316; CHECK-NEXT:    vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07]
317; CHECK-NEXT:    retq ## encoding: [0xc3]
318  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
319  %vaddr = bitcast i8* %addr to <16 x i32>*
320  %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
321  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
322  ret <16 x i32>%res
323}
324
325define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
326; CHECK-LABEL: test33:
327; CHECK:       ## BB#0:
328; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
329; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
330; CHECK-NEXT:    vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07]
331; CHECK-NEXT:    retq ## encoding: [0xc3]
332  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
333  %vaddr = bitcast i8* %addr to <16 x i32>*
334  %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
335  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
336  ret <16 x i32>%res
337}
338
339define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
340; CHECK-LABEL: test34:
341; CHECK:       ## BB#0:
342; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
343; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
344; CHECK-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07]
345; CHECK-NEXT:    retq ## encoding: [0xc3]
346  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
347  %vaddr = bitcast i8* %addr to <16 x i32>*
348  %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
349  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
350  ret <16 x i32>%res
351}
352
353define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
354; CHECK-LABEL: test35:
355; CHECK:       ## BB#0:
356; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
357; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
358; CHECK-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07]
359; CHECK-NEXT:    retq ## encoding: [0xc3]
360  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
361  %vaddr = bitcast i8* %addr to <16 x i32>*
362  %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
363  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
364  ret <16 x i32>%res
365}
366
367define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
368; CHECK-LABEL: test36:
369; CHECK:       ## BB#0:
370; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
371; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
372; CHECK-NEXT:    vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07]
373; CHECK-NEXT:    retq ## encoding: [0xc3]
374  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
375  %vaddr = bitcast i8* %addr to <8 x i64>*
376  %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
377  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
378  ret <8 x i64>%res
379}
380
381define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
382; CHECK-LABEL: test37:
383; CHECK:       ## BB#0:
384; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
385; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
386; CHECK-NEXT:    vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07]
387; CHECK-NEXT:    retq ## encoding: [0xc3]
388  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
389  %vaddr = bitcast i8* %addr to <8 x i64>*
390  %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
391  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
392  ret <8 x i64>%res
393}
394
395define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
396; CHECK-LABEL: test38:
397; CHECK:       ## BB#0:
398; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
399; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
400; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07]
401; CHECK-NEXT:    retq ## encoding: [0xc3]
402  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
403  %vaddr = bitcast i8* %addr to <8 x i64>*
404  %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
405  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
406  ret <8 x i64>%res
407}
408
409define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
410; CHECK-LABEL: test39:
411; CHECK:       ## BB#0:
412; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
413; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
414; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07]
415; CHECK-NEXT:    retq ## encoding: [0xc3]
416  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
417  %vaddr = bitcast i8* %addr to <8 x i64>*
418  %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
419  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
420  ret <8 x i64>%res
421}
422
423define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
424; CHECK-LABEL: test40:
425; CHECK:       ## BB#0:
426; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
427; CHECK-NEXT:    vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
428; CHECK-NEXT:    vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
429; CHECK-NEXT:    vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07]
430; CHECK-NEXT:    retq ## encoding: [0xc3]
431  %mask = fcmp one <16 x float> %mask1, zeroinitializer
432  %vaddr = bitcast i8* %addr to <16 x float>*
433  %r = load <16 x float>, <16 x float>* %vaddr, align 64
434  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
435  ret <16 x float>%res
436}
437
438define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
439; CHECK-LABEL: test41:
440; CHECK:       ## BB#0:
441; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
442; CHECK-NEXT:    vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
443; CHECK-NEXT:    vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
444; CHECK-NEXT:    vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07]
445; CHECK-NEXT:    retq ## encoding: [0xc3]
446  %mask = fcmp one <16 x float> %mask1, zeroinitializer
447  %vaddr = bitcast i8* %addr to <16 x float>*
448  %r = load <16 x float>, <16 x float>* %vaddr, align 1
449  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
450  ret <16 x float>%res
451}
452
453define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
454; CHECK-LABEL: test42:
455; CHECK:       ## BB#0:
456; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
457; CHECK-NEXT:    vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
458; CHECK-NEXT:    vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
459; CHECK-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
460; CHECK-NEXT:    retq ## encoding: [0xc3]
461  %mask = fcmp one <16 x float> %mask1, zeroinitializer
462  %vaddr = bitcast i8* %addr to <16 x float>*
463  %r = load <16 x float>, <16 x float>* %vaddr, align 64
464  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
465  ret <16 x float>%res
466}
467
468define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
469; CHECK-LABEL: test43:
470; CHECK:       ## BB#0:
471; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
472; CHECK-NEXT:    vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
473; CHECK-NEXT:    vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
474; CHECK-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07]
475; CHECK-NEXT:    retq ## encoding: [0xc3]
476  %mask = fcmp one <16 x float> %mask1, zeroinitializer
477  %vaddr = bitcast i8* %addr to <16 x float>*
478  %r = load <16 x float>, <16 x float>* %vaddr, align 1
479  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
480  ret <16 x float>%res
481}
482
483define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
484; CHECK-LABEL: test44:
485; CHECK:       ## BB#0:
486; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
487; CHECK-NEXT:    vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
488; CHECK-NEXT:    vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
489; CHECK-NEXT:    vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07]
490; CHECK-NEXT:    retq ## encoding: [0xc3]
491  %mask = fcmp one <8 x double> %mask1, zeroinitializer
492  %vaddr = bitcast i8* %addr to <8 x double>*
493  %r = load <8 x double>, <8 x double>* %vaddr, align 64
494  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
495  ret <8 x double>%res
496}
497
498define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
499; CHECK-LABEL: test45:
500; CHECK:       ## BB#0:
501; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
502; CHECK-NEXT:    vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
503; CHECK-NEXT:    vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
504; CHECK-NEXT:    vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07]
505; CHECK-NEXT:    retq ## encoding: [0xc3]
506  %mask = fcmp one <8 x double> %mask1, zeroinitializer
507  %vaddr = bitcast i8* %addr to <8 x double>*
508  %r = load <8 x double>, <8 x double>* %vaddr, align 1
509  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
510  ret <8 x double>%res
511}
512
513define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
514; CHECK-LABEL: test46:
515; CHECK:       ## BB#0:
516; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
517; CHECK-NEXT:    vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
518; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
519; CHECK-NEXT:    vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07]
520; CHECK-NEXT:    retq ## encoding: [0xc3]
521  %mask = fcmp one <8 x double> %mask1, zeroinitializer
522  %vaddr = bitcast i8* %addr to <8 x double>*
523  %r = load <8 x double>, <8 x double>* %vaddr, align 64
524  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
525  ret <8 x double>%res
526}
527
528define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
529; CHECK-LABEL: test47:
530; CHECK:       ## BB#0:
531; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
532; CHECK-NEXT:    vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
533; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
534; CHECK-NEXT:    vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07]
535; CHECK-NEXT:    retq ## encoding: [0xc3]
536  %mask = fcmp one <8 x double> %mask1, zeroinitializer
537  %vaddr = bitcast i8* %addr to <8 x double>*
538  %r = load <8 x double>, <8 x double>* %vaddr, align 1
539  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
540  ret <8 x double>%res
541}
542