xref: /aosp_15_r20/external/clang/test/CodeGen/sse2-builtins.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
2*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3*67e74705SXin Li 
4*67e74705SXin Li // Don't include mm_malloc.h, it's system specific.
5*67e74705SXin Li #define __MM_MALLOC_H
6*67e74705SXin Li 
7*67e74705SXin Li #include <x86intrin.h>
8*67e74705SXin Li 
9*67e74705SXin Li // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
10*67e74705SXin Li 
test_mm_add_epi8(__m128i A,__m128i B)11*67e74705SXin Li __m128i test_mm_add_epi8(__m128i A, __m128i B) {
12*67e74705SXin Li   // CHECK-LABEL: test_mm_add_epi8
13*67e74705SXin Li   // CHECK: add <16 x i8>
14*67e74705SXin Li   return _mm_add_epi8(A, B);
15*67e74705SXin Li }
16*67e74705SXin Li 
test_mm_add_epi16(__m128i A,__m128i B)17*67e74705SXin Li __m128i test_mm_add_epi16(__m128i A, __m128i B) {
18*67e74705SXin Li   // CHECK-LABEL: test_mm_add_epi16
19*67e74705SXin Li   // CHECK: add <8 x i16>
20*67e74705SXin Li   return _mm_add_epi16(A, B);
21*67e74705SXin Li }
22*67e74705SXin Li 
test_mm_add_epi32(__m128i A,__m128i B)23*67e74705SXin Li __m128i test_mm_add_epi32(__m128i A, __m128i B) {
24*67e74705SXin Li   // CHECK-LABEL: test_mm_add_epi32
25*67e74705SXin Li   // CHECK: add <4 x i32>
26*67e74705SXin Li   return _mm_add_epi32(A, B);
27*67e74705SXin Li }
28*67e74705SXin Li 
test_mm_add_epi64(__m128i A,__m128i B)29*67e74705SXin Li __m128i test_mm_add_epi64(__m128i A, __m128i B) {
30*67e74705SXin Li   // CHECK-LABEL: test_mm_add_epi64
31*67e74705SXin Li   // CHECK: add <2 x i64>
32*67e74705SXin Li   return _mm_add_epi64(A, B);
33*67e74705SXin Li }
34*67e74705SXin Li 
test_mm_add_pd(__m128d A,__m128d B)35*67e74705SXin Li __m128d test_mm_add_pd(__m128d A, __m128d B) {
36*67e74705SXin Li   // CHECK-LABEL: test_mm_add_pd
37*67e74705SXin Li   // CHECK: fadd <2 x double>
38*67e74705SXin Li   return _mm_add_pd(A, B);
39*67e74705SXin Li }
40*67e74705SXin Li 
test_mm_add_sd(__m128d A,__m128d B)41*67e74705SXin Li __m128d test_mm_add_sd(__m128d A, __m128d B) {
42*67e74705SXin Li   // CHECK-LABEL: test_mm_add_sd
43*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
44*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
45*67e74705SXin Li   // CHECK: fadd double
46*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
47*67e74705SXin Li   return _mm_add_sd(A, B);
48*67e74705SXin Li }
49*67e74705SXin Li 
test_mm_adds_epi8(__m128i A,__m128i B)50*67e74705SXin Li __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
51*67e74705SXin Li   // CHECK-LABEL: test_mm_adds_epi8
52*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
53*67e74705SXin Li   return _mm_adds_epi8(A, B);
54*67e74705SXin Li }
55*67e74705SXin Li 
test_mm_adds_epi16(__m128i A,__m128i B)56*67e74705SXin Li __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
57*67e74705SXin Li   // CHECK-LABEL: test_mm_adds_epi16
58*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
59*67e74705SXin Li   return _mm_adds_epi16(A, B);
60*67e74705SXin Li }
61*67e74705SXin Li 
test_mm_adds_epu8(__m128i A,__m128i B)62*67e74705SXin Li __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
63*67e74705SXin Li   // CHECK-LABEL: test_mm_adds_epu8
64*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
65*67e74705SXin Li   return _mm_adds_epu8(A, B);
66*67e74705SXin Li }
67*67e74705SXin Li 
test_mm_adds_epu16(__m128i A,__m128i B)68*67e74705SXin Li __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
69*67e74705SXin Li   // CHECK-LABEL: test_mm_adds_epu16
70*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
71*67e74705SXin Li   return _mm_adds_epu16(A, B);
72*67e74705SXin Li }
73*67e74705SXin Li 
test_mm_and_pd(__m128d A,__m128d B)74*67e74705SXin Li __m128d test_mm_and_pd(__m128d A, __m128d B) {
75*67e74705SXin Li   // CHECK-LABEL: test_mm_and_pd
76*67e74705SXin Li   // CHECK: and <4 x i32>
77*67e74705SXin Li   return _mm_and_pd(A, B);
78*67e74705SXin Li }
79*67e74705SXin Li 
test_mm_and_si128(__m128i A,__m128i B)80*67e74705SXin Li __m128i test_mm_and_si128(__m128i A, __m128i B) {
81*67e74705SXin Li   // CHECK-LABEL: test_mm_and_si128
82*67e74705SXin Li   // CHECK: and <2 x i64>
83*67e74705SXin Li   return _mm_and_si128(A, B);
84*67e74705SXin Li }
85*67e74705SXin Li 
test_mm_andnot_pd(__m128d A,__m128d B)86*67e74705SXin Li __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
87*67e74705SXin Li   // CHECK-LABEL: test_mm_andnot_pd
88*67e74705SXin Li   // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
89*67e74705SXin Li   // CHECK: and <4 x i32>
90*67e74705SXin Li   return _mm_andnot_pd(A, B);
91*67e74705SXin Li }
92*67e74705SXin Li 
test_mm_andnot_si128(__m128i A,__m128i B)93*67e74705SXin Li __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
94*67e74705SXin Li   // CHECK-LABEL: test_mm_andnot_si128
95*67e74705SXin Li   // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
96*67e74705SXin Li   // CHECK: and <2 x i64>
97*67e74705SXin Li   return _mm_andnot_si128(A, B);
98*67e74705SXin Li }
99*67e74705SXin Li 
test_mm_avg_epu8(__m128i A,__m128i B)100*67e74705SXin Li __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
101*67e74705SXin Li   // CHECK-LABEL: test_mm_avg_epu8
102*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
103*67e74705SXin Li   return _mm_avg_epu8(A, B);
104*67e74705SXin Li }
105*67e74705SXin Li 
test_mm_avg_epu16(__m128i A,__m128i B)106*67e74705SXin Li __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
107*67e74705SXin Li   // CHECK-LABEL: test_mm_avg_epu16
108*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
109*67e74705SXin Li   return _mm_avg_epu16(A, B);
110*67e74705SXin Li }
111*67e74705SXin Li 
test_mm_bslli_si128(__m128i A)112*67e74705SXin Li __m128i test_mm_bslli_si128(__m128i A) {
113*67e74705SXin Li   // CHECK-LABEL: test_mm_bslli_si128
114*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
115*67e74705SXin Li   return _mm_bslli_si128(A, 5);
116*67e74705SXin Li }
117*67e74705SXin Li 
test_mm_bsrli_si128(__m128i A)118*67e74705SXin Li __m128i test_mm_bsrli_si128(__m128i A) {
119*67e74705SXin Li   // CHECK-LABEL: test_mm_bsrli_si128
120*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
121*67e74705SXin Li   return _mm_bsrli_si128(A, 5);
122*67e74705SXin Li }
123*67e74705SXin Li 
test_mm_castpd_ps(__m128d A)124*67e74705SXin Li __m128 test_mm_castpd_ps(__m128d A) {
125*67e74705SXin Li   // CHECK-LABEL: test_mm_castpd_ps
126*67e74705SXin Li   // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
127*67e74705SXin Li   return _mm_castpd_ps(A);
128*67e74705SXin Li }
129*67e74705SXin Li 
test_mm_castpd_si128(__m128d A)130*67e74705SXin Li __m128i test_mm_castpd_si128(__m128d A) {
131*67e74705SXin Li   // CHECK-LABEL: test_mm_castpd_si128
132*67e74705SXin Li   // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
133*67e74705SXin Li   return _mm_castpd_si128(A);
134*67e74705SXin Li }
135*67e74705SXin Li 
test_mm_castps_pd(__m128 A)136*67e74705SXin Li __m128d test_mm_castps_pd(__m128 A) {
137*67e74705SXin Li   // CHECK-LABEL: test_mm_castps_pd
138*67e74705SXin Li   // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
139*67e74705SXin Li   return _mm_castps_pd(A);
140*67e74705SXin Li }
141*67e74705SXin Li 
test_mm_castps_si128(__m128 A)142*67e74705SXin Li __m128i test_mm_castps_si128(__m128 A) {
143*67e74705SXin Li   // CHECK-LABEL: test_mm_castps_si128
144*67e74705SXin Li   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
145*67e74705SXin Li   return _mm_castps_si128(A);
146*67e74705SXin Li }
147*67e74705SXin Li 
test_mm_castsi128_pd(__m128i A)148*67e74705SXin Li __m128d test_mm_castsi128_pd(__m128i A) {
149*67e74705SXin Li   // CHECK-LABEL: test_mm_castsi128_pd
150*67e74705SXin Li   // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
151*67e74705SXin Li   return _mm_castsi128_pd(A);
152*67e74705SXin Li }
153*67e74705SXin Li 
test_mm_castsi128_ps(__m128i A)154*67e74705SXin Li __m128 test_mm_castsi128_ps(__m128i A) {
155*67e74705SXin Li   // CHECK-LABEL: test_mm_castsi128_ps
156*67e74705SXin Li   // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
157*67e74705SXin Li   return _mm_castsi128_ps(A);
158*67e74705SXin Li }
159*67e74705SXin Li 
test_mm_clflush(void * A)160*67e74705SXin Li void test_mm_clflush(void* A) {
161*67e74705SXin Li   // CHECK-LABEL: test_mm_clflush
162*67e74705SXin Li   // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
163*67e74705SXin Li   _mm_clflush(A);
164*67e74705SXin Li }
165*67e74705SXin Li 
test_mm_cmpeq_epi8(__m128i A,__m128i B)166*67e74705SXin Li __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
167*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpeq_epi8
168*67e74705SXin Li   // CHECK: icmp eq <16 x i8>
169*67e74705SXin Li   return _mm_cmpeq_epi8(A, B);
170*67e74705SXin Li }
171*67e74705SXin Li 
test_mm_cmpeq_epi16(__m128i A,__m128i B)172*67e74705SXin Li __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
173*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpeq_epi16
174*67e74705SXin Li   // CHECK: icmp eq <8 x i16>
175*67e74705SXin Li   return _mm_cmpeq_epi16(A, B);
176*67e74705SXin Li }
177*67e74705SXin Li 
test_mm_cmpeq_epi32(__m128i A,__m128i B)178*67e74705SXin Li __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
179*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpeq_epi32
180*67e74705SXin Li   // CHECK: icmp eq <4 x i32>
181*67e74705SXin Li   return _mm_cmpeq_epi32(A, B);
182*67e74705SXin Li }
183*67e74705SXin Li 
test_mm_cmpeq_pd(__m128d A,__m128d B)184*67e74705SXin Li __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
185*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpeq_pd
186*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
187*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
188*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
189*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
190*67e74705SXin Li   return _mm_cmpeq_pd(A, B);
191*67e74705SXin Li }
192*67e74705SXin Li 
test_mm_cmpeq_sd(__m128d A,__m128d B)193*67e74705SXin Li __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
194*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpeq_sd
195*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
196*67e74705SXin Li   return _mm_cmpeq_sd(A, B);
197*67e74705SXin Li }
198*67e74705SXin Li 
test_mm_cmpge_pd(__m128d A,__m128d B)199*67e74705SXin Li __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
200*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpge_pd
201*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ole <2 x double>
202*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
203*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
204*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
205*67e74705SXin Li   return _mm_cmpge_pd(A, B);
206*67e74705SXin Li }
207*67e74705SXin Li 
test_mm_cmpge_sd(__m128d A,__m128d B)208*67e74705SXin Li __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
209*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpge_sd
210*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
211*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
212*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
213*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
214*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
215*67e74705SXin Li   return _mm_cmpge_sd(A, B);
216*67e74705SXin Li }
217*67e74705SXin Li 
test_mm_cmpgt_epi8(__m128i A,__m128i B)218*67e74705SXin Li __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
219*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpgt_epi8
220*67e74705SXin Li   // CHECK: icmp sgt <16 x i8>
221*67e74705SXin Li   return _mm_cmpgt_epi8(A, B);
222*67e74705SXin Li }
223*67e74705SXin Li 
test_mm_cmpgt_epi16(__m128i A,__m128i B)224*67e74705SXin Li __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
225*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpgt_epi16
226*67e74705SXin Li   // CHECK: icmp sgt <8 x i16>
227*67e74705SXin Li   return _mm_cmpgt_epi16(A, B);
228*67e74705SXin Li }
229*67e74705SXin Li 
test_mm_cmpgt_epi32(__m128i A,__m128i B)230*67e74705SXin Li __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
231*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpgt_epi32
232*67e74705SXin Li   // CHECK: icmp sgt <4 x i32>
233*67e74705SXin Li   return _mm_cmpgt_epi32(A, B);
234*67e74705SXin Li }
235*67e74705SXin Li 
test_mm_cmpgt_pd(__m128d A,__m128d B)236*67e74705SXin Li __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
237*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpgt_pd
238*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp olt <2 x double>
239*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
240*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
241*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
242*67e74705SXin Li   return _mm_cmpgt_pd(A, B);
243*67e74705SXin Li }
244*67e74705SXin Li 
test_mm_cmpgt_sd(__m128d A,__m128d B)245*67e74705SXin Li __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
246*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpgt_sd
247*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
248*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
249*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
250*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
251*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
252*67e74705SXin Li   return _mm_cmpgt_sd(A, B);
253*67e74705SXin Li }
254*67e74705SXin Li 
test_mm_cmple_pd(__m128d A,__m128d B)255*67e74705SXin Li __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
256*67e74705SXin Li   // CHECK-LABEL: test_mm_cmple_pd
257*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ole <2 x double>
258*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
259*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
260*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
261*67e74705SXin Li   return _mm_cmple_pd(A, B);
262*67e74705SXin Li }
263*67e74705SXin Li 
test_mm_cmple_sd(__m128d A,__m128d B)264*67e74705SXin Li __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
265*67e74705SXin Li   // CHECK-LABEL: test_mm_cmple_sd
266*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
267*67e74705SXin Li   return _mm_cmple_sd(A, B);
268*67e74705SXin Li }
269*67e74705SXin Li 
test_mm_cmplt_epi8(__m128i A,__m128i B)270*67e74705SXin Li __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
271*67e74705SXin Li   // CHECK-LABEL: test_mm_cmplt_epi8
272*67e74705SXin Li   // CHECK: icmp sgt <16 x i8>
273*67e74705SXin Li   return _mm_cmplt_epi8(A, B);
274*67e74705SXin Li }
275*67e74705SXin Li 
test_mm_cmplt_epi16(__m128i A,__m128i B)276*67e74705SXin Li __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
277*67e74705SXin Li   // CHECK-LABEL: test_mm_cmplt_epi16
278*67e74705SXin Li   // CHECK: icmp sgt <8 x i16>
279*67e74705SXin Li   return _mm_cmplt_epi16(A, B);
280*67e74705SXin Li }
281*67e74705SXin Li 
test_mm_cmplt_epi32(__m128i A,__m128i B)282*67e74705SXin Li __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
283*67e74705SXin Li   // CHECK-LABEL: test_mm_cmplt_epi32
284*67e74705SXin Li   // CHECK: icmp sgt <4 x i32>
285*67e74705SXin Li   return _mm_cmplt_epi32(A, B);
286*67e74705SXin Li }
287*67e74705SXin Li 
test_mm_cmplt_pd(__m128d A,__m128d B)288*67e74705SXin Li __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
289*67e74705SXin Li   // CHECK-LABEL: test_mm_cmplt_pd
290*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp olt <2 x double>
291*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
292*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
293*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
294*67e74705SXin Li   return _mm_cmplt_pd(A, B);
295*67e74705SXin Li }
296*67e74705SXin Li 
test_mm_cmplt_sd(__m128d A,__m128d B)297*67e74705SXin Li __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
298*67e74705SXin Li   // CHECK-LABEL: test_mm_cmplt_sd
299*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
300*67e74705SXin Li   return _mm_cmplt_sd(A, B);
301*67e74705SXin Li }
302*67e74705SXin Li 
test_mm_cmpneq_pd(__m128d A,__m128d B)303*67e74705SXin Li __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
304*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpneq_pd
305*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp une <2 x double>
306*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
307*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
308*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
309*67e74705SXin Li   return _mm_cmpneq_pd(A, B);
310*67e74705SXin Li }
311*67e74705SXin Li 
test_mm_cmpneq_sd(__m128d A,__m128d B)312*67e74705SXin Li __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
313*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpneq_sd
314*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
315*67e74705SXin Li   return _mm_cmpneq_sd(A, B);
316*67e74705SXin Li }
317*67e74705SXin Li 
test_mm_cmpnge_pd(__m128d A,__m128d B)318*67e74705SXin Li __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
319*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpnge_pd
320*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ugt <2 x double>
321*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
322*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
323*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
324*67e74705SXin Li   return _mm_cmpnge_pd(A, B);
325*67e74705SXin Li }
326*67e74705SXin Li 
test_mm_cmpnge_sd(__m128d A,__m128d B)327*67e74705SXin Li __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
328*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpnge_sd
329*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
330*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
331*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
332*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
333*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
334*67e74705SXin Li   return _mm_cmpnge_sd(A, B);
335*67e74705SXin Li }
336*67e74705SXin Li 
test_mm_cmpngt_pd(__m128d A,__m128d B)337*67e74705SXin Li __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
338*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpngt_pd
339*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp uge <2 x double>
340*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
341*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
342*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
343*67e74705SXin Li   return _mm_cmpngt_pd(A, B);
344*67e74705SXin Li }
345*67e74705SXin Li 
test_mm_cmpngt_sd(__m128d A,__m128d B)346*67e74705SXin Li __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
347*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpngt_sd
348*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
349*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
350*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
351*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
352*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
353*67e74705SXin Li   return _mm_cmpngt_sd(A, B);
354*67e74705SXin Li }
355*67e74705SXin Li 
test_mm_cmpnle_pd(__m128d A,__m128d B)356*67e74705SXin Li __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
357*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpnle_pd
358*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ugt <2 x double>
359*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
360*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
361*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
362*67e74705SXin Li   return _mm_cmpnle_pd(A, B);
363*67e74705SXin Li }
364*67e74705SXin Li 
test_mm_cmpnle_sd(__m128d A,__m128d B)365*67e74705SXin Li __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
366*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpnle_sd
367*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
368*67e74705SXin Li   return _mm_cmpnle_sd(A, B);
369*67e74705SXin Li }
370*67e74705SXin Li 
test_mm_cmpnlt_pd(__m128d A,__m128d B)371*67e74705SXin Li __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
372*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpnlt_pd
373*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp uge <2 x double>
374*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
375*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
376*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
377*67e74705SXin Li   return _mm_cmpnlt_pd(A, B);
378*67e74705SXin Li }
379*67e74705SXin Li 
test_mm_cmpnlt_sd(__m128d A,__m128d B)380*67e74705SXin Li __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
381*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpnlt_sd
382*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
383*67e74705SXin Li   return _mm_cmpnlt_sd(A, B);
384*67e74705SXin Li }
385*67e74705SXin Li 
test_mm_cmpord_pd(__m128d A,__m128d B)386*67e74705SXin Li __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
387*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpord_pd
388*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ord <2 x double>
389*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
390*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
391*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
392*67e74705SXin Li   return _mm_cmpord_pd(A, B);
393*67e74705SXin Li }
394*67e74705SXin Li 
test_mm_cmpord_sd(__m128d A,__m128d B)395*67e74705SXin Li __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
396*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpord_sd
397*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
398*67e74705SXin Li   return _mm_cmpord_sd(A, B);
399*67e74705SXin Li }
400*67e74705SXin Li 
test_mm_cmpunord_pd(__m128d A,__m128d B)401*67e74705SXin Li __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
402*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpunord_pd
403*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp uno <2 x double>
404*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
405*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
406*67e74705SXin Li   // CHECK-NEXT:    ret <2 x double> [[BC]]
407*67e74705SXin Li   return _mm_cmpunord_pd(A, B);
408*67e74705SXin Li }
409*67e74705SXin Li 
test_mm_cmpunord_sd(__m128d A,__m128d B)410*67e74705SXin Li __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
411*67e74705SXin Li   // CHECK-LABEL: test_mm_cmpunord_sd
412*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
413*67e74705SXin Li   return _mm_cmpunord_sd(A, B);
414*67e74705SXin Li }
415*67e74705SXin Li 
test_mm_comieq_sd(__m128d A,__m128d B)416*67e74705SXin Li int test_mm_comieq_sd(__m128d A, __m128d B) {
417*67e74705SXin Li   // CHECK-LABEL: test_mm_comieq_sd
418*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
419*67e74705SXin Li   return _mm_comieq_sd(A, B);
420*67e74705SXin Li }
421*67e74705SXin Li 
test_mm_comige_sd(__m128d A,__m128d B)422*67e74705SXin Li int test_mm_comige_sd(__m128d A, __m128d B) {
423*67e74705SXin Li   // CHECK-LABEL: test_mm_comige_sd
424*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
425*67e74705SXin Li   return _mm_comige_sd(A, B);
426*67e74705SXin Li }
427*67e74705SXin Li 
test_mm_comigt_sd(__m128d A,__m128d B)428*67e74705SXin Li int test_mm_comigt_sd(__m128d A, __m128d B) {
429*67e74705SXin Li   // CHECK-LABEL: test_mm_comigt_sd
430*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
431*67e74705SXin Li   return _mm_comigt_sd(A, B);
432*67e74705SXin Li }
433*67e74705SXin Li 
test_mm_comile_sd(__m128d A,__m128d B)434*67e74705SXin Li int test_mm_comile_sd(__m128d A, __m128d B) {
435*67e74705SXin Li   // CHECK-LABEL: test_mm_comile_sd
436*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
437*67e74705SXin Li   return _mm_comile_sd(A, B);
438*67e74705SXin Li }
439*67e74705SXin Li 
test_mm_comilt_sd(__m128d A,__m128d B)440*67e74705SXin Li int test_mm_comilt_sd(__m128d A, __m128d B) {
441*67e74705SXin Li   // CHECK-LABEL: test_mm_comilt_sd
442*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
443*67e74705SXin Li   return _mm_comilt_sd(A, B);
444*67e74705SXin Li }
445*67e74705SXin Li 
test_mm_comineq_sd(__m128d A,__m128d B)446*67e74705SXin Li int test_mm_comineq_sd(__m128d A, __m128d B) {
447*67e74705SXin Li   // CHECK-LABEL: test_mm_comineq_sd
448*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
449*67e74705SXin Li   return _mm_comineq_sd(A, B);
450*67e74705SXin Li }
451*67e74705SXin Li 
test_mm_cvtepi32_pd(__m128i A)452*67e74705SXin Li __m128d test_mm_cvtepi32_pd(__m128i A) {
453*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtepi32_pd
454*67e74705SXin Li   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
455*67e74705SXin Li   // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
456*67e74705SXin Li   return _mm_cvtepi32_pd(A);
457*67e74705SXin Li }
458*67e74705SXin Li 
test_mm_cvtepi32_ps(__m128i A)459*67e74705SXin Li __m128 test_mm_cvtepi32_ps(__m128i A) {
460*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtepi32_ps
461*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %{{.*}})
462*67e74705SXin Li   return _mm_cvtepi32_ps(A);
463*67e74705SXin Li }
464*67e74705SXin Li 
test_mm_cvtpd_epi32(__m128d A)465*67e74705SXin Li __m128i test_mm_cvtpd_epi32(__m128d A) {
466*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtpd_epi32
467*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
468*67e74705SXin Li   return _mm_cvtpd_epi32(A);
469*67e74705SXin Li }
470*67e74705SXin Li 
test_mm_cvtpd_ps(__m128d A)471*67e74705SXin Li __m128 test_mm_cvtpd_ps(__m128d A) {
472*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtpd_ps
473*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
474*67e74705SXin Li   return _mm_cvtpd_ps(A);
475*67e74705SXin Li }
476*67e74705SXin Li 
test_mm_cvtps_epi32(__m128 A)477*67e74705SXin Li __m128i test_mm_cvtps_epi32(__m128 A) {
478*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtps_epi32
479*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
480*67e74705SXin Li   return _mm_cvtps_epi32(A);
481*67e74705SXin Li }
482*67e74705SXin Li 
test_mm_cvtps_pd(__m128 A)483*67e74705SXin Li __m128d test_mm_cvtps_pd(__m128 A) {
484*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtps_pd
485*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
486*67e74705SXin Li   // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
487*67e74705SXin Li   return _mm_cvtps_pd(A);
488*67e74705SXin Li }
489*67e74705SXin Li 
test_mm_cvtsd_f64(__m128d A)490*67e74705SXin Li double test_mm_cvtsd_f64(__m128d A) {
491*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsd_f64
492*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
493*67e74705SXin Li   return _mm_cvtsd_f64(A);
494*67e74705SXin Li }
495*67e74705SXin Li 
test_mm_cvtsd_si32(__m128d A)496*67e74705SXin Li int test_mm_cvtsd_si32(__m128d A) {
497*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsd_si32
498*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
499*67e74705SXin Li   return _mm_cvtsd_si32(A);
500*67e74705SXin Li }
501*67e74705SXin Li 
test_mm_cvtsd_si64(__m128d A)502*67e74705SXin Li long long test_mm_cvtsd_si64(__m128d A) {
503*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsd_si64
504*67e74705SXin Li   // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
505*67e74705SXin Li   return _mm_cvtsd_si64(A);
506*67e74705SXin Li }
507*67e74705SXin Li 
test_mm_cvtsd_ss(__m128 A,__m128d B)508*67e74705SXin Li __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
509*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsd_ss
510*67e74705SXin Li   // CHECK: fptrunc double %{{.*}} to float
511*67e74705SXin Li   return _mm_cvtsd_ss(A, B);
512*67e74705SXin Li }
513*67e74705SXin Li 
test_mm_cvtsi128_si32(__m128i A)514*67e74705SXin Li int test_mm_cvtsi128_si32(__m128i A) {
515*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi128_si32
516*67e74705SXin Li   // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
517*67e74705SXin Li   return _mm_cvtsi128_si32(A);
518*67e74705SXin Li }
519*67e74705SXin Li 
test_mm_cvtsi128_si64(__m128i A)520*67e74705SXin Li long long test_mm_cvtsi128_si64(__m128i A) {
521*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi128_si64
522*67e74705SXin Li   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
523*67e74705SXin Li   return _mm_cvtsi128_si64(A);
524*67e74705SXin Li }
525*67e74705SXin Li 
test_mm_cvtsi32_sd(__m128d A,int B)526*67e74705SXin Li __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
527*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi32_sd
528*67e74705SXin Li   // CHECK: sitofp i32 %{{.*}} to double
529*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
530*67e74705SXin Li   return _mm_cvtsi32_sd(A, B);
531*67e74705SXin Li }
532*67e74705SXin Li 
test_mm_cvtsi32_si128(int A)533*67e74705SXin Li __m128i test_mm_cvtsi32_si128(int A) {
534*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi32_si128
535*67e74705SXin Li   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
536*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
537*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
538*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
539*67e74705SXin Li   return _mm_cvtsi32_si128(A);
540*67e74705SXin Li }
541*67e74705SXin Li 
test_mm_cvtsi64_sd(__m128d A,long long B)542*67e74705SXin Li __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
543*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi64_sd
544*67e74705SXin Li   // CHECK: sitofp i64 %{{.*}} to double
545*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
546*67e74705SXin Li   return _mm_cvtsi64_sd(A, B);
547*67e74705SXin Li }
548*67e74705SXin Li 
test_mm_cvtsi64_si128(long long A)549*67e74705SXin Li __m128i test_mm_cvtsi64_si128(long long A) {
550*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi64_si128
551*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
552*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
553*67e74705SXin Li   return _mm_cvtsi64_si128(A);
554*67e74705SXin Li }
555*67e74705SXin Li 
test_mm_cvtss_sd(__m128d A,__m128 B)556*67e74705SXin Li __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
557*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtss_sd
558*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
559*67e74705SXin Li   // CHECK: fpext float %{{.*}} to double
560*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
561*67e74705SXin Li   return _mm_cvtss_sd(A, B);
562*67e74705SXin Li }
563*67e74705SXin Li 
test_mm_cvttpd_epi32(__m128d A)564*67e74705SXin Li __m128i test_mm_cvttpd_epi32(__m128d A) {
565*67e74705SXin Li   // CHECK-LABEL: test_mm_cvttpd_epi32
566*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
567*67e74705SXin Li   return _mm_cvttpd_epi32(A);
568*67e74705SXin Li }
569*67e74705SXin Li 
test_mm_cvttps_epi32(__m128 A)570*67e74705SXin Li __m128i test_mm_cvttps_epi32(__m128 A) {
571*67e74705SXin Li   // CHECK-LABEL: test_mm_cvttps_epi32
572*67e74705SXin Li   // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
573*67e74705SXin Li   return _mm_cvttps_epi32(A);
574*67e74705SXin Li }
575*67e74705SXin Li 
test_mm_cvttsd_si32(__m128d A)576*67e74705SXin Li int test_mm_cvttsd_si32(__m128d A) {
577*67e74705SXin Li   // CHECK-LABEL: test_mm_cvttsd_si32
578*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
579*67e74705SXin Li   // CHECK: fptosi double %{{.*}} to i32
580*67e74705SXin Li   return _mm_cvttsd_si32(A);
581*67e74705SXin Li }
582*67e74705SXin Li 
test_mm_cvttsd_si64(__m128d A)583*67e74705SXin Li long long test_mm_cvttsd_si64(__m128d A) {
584*67e74705SXin Li   // CHECK-LABEL: test_mm_cvttsd_si64
585*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
586*67e74705SXin Li   // CHECK: fptosi double %{{.*}} to i64
587*67e74705SXin Li   return _mm_cvttsd_si64(A);
588*67e74705SXin Li }
589*67e74705SXin Li 
test_mm_div_pd(__m128d A,__m128d B)590*67e74705SXin Li __m128d test_mm_div_pd(__m128d A, __m128d B) {
591*67e74705SXin Li   // CHECK-LABEL: test_mm_div_pd
592*67e74705SXin Li   // CHECK: fdiv <2 x double>
593*67e74705SXin Li   return _mm_div_pd(A, B);
594*67e74705SXin Li }
595*67e74705SXin Li 
test_mm_div_sd(__m128d A,__m128d B)596*67e74705SXin Li __m128d test_mm_div_sd(__m128d A, __m128d B) {
597*67e74705SXin Li   // CHECK-LABEL: test_mm_div_sd
598*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
599*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
600*67e74705SXin Li   // CHECK: fdiv double
601*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
602*67e74705SXin Li   return _mm_div_sd(A, B);
603*67e74705SXin Li }
604*67e74705SXin Li 
605*67e74705SXin Li // Lowering to pextrw requires optimization.
test_mm_extract_epi16(__m128i A)606*67e74705SXin Li int test_mm_extract_epi16(__m128i A) {
607*67e74705SXin Li   // CHECK-LABEL: test_mm_extract_epi16
608*67e74705SXin Li   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
609*67e74705SXin Li   // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
610*67e74705SXin Li   // CHECK: zext i16 %{{.*}} to i32
611*67e74705SXin Li   return _mm_extract_epi16(A, 9);
612*67e74705SXin Li }
613*67e74705SXin Li 
test_mm_insert_epi16(__m128i A,int B)614*67e74705SXin Li __m128i test_mm_insert_epi16(__m128i A, int B) {
615*67e74705SXin Li   // CHECK-LABEL: test_mm_insert_epi16
616*67e74705SXin Li   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
617*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
618*67e74705SXin Li   return _mm_insert_epi16(A, B, 8);
619*67e74705SXin Li }
620*67e74705SXin Li 
test_mm_lfence()621*67e74705SXin Li void test_mm_lfence() {
622*67e74705SXin Li   // CHECK-LABEL: test_mm_lfence
623*67e74705SXin Li   // CHECK: call void @llvm.x86.sse2.lfence()
624*67e74705SXin Li   _mm_lfence();
625*67e74705SXin Li }
626*67e74705SXin Li 
test_mm_load_pd(double const * A)627*67e74705SXin Li __m128d test_mm_load_pd(double const* A) {
628*67e74705SXin Li   // CHECK-LABEL: test_mm_load_pd
629*67e74705SXin Li   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
630*67e74705SXin Li   return _mm_load_pd(A);
631*67e74705SXin Li }
632*67e74705SXin Li 
test_mm_load_pd1(double const * A)633*67e74705SXin Li __m128d test_mm_load_pd1(double const* A) {
634*67e74705SXin Li   // CHECK-LABEL: test_mm_load_pd1
635*67e74705SXin Li   // CHECK: load double, double* %{{.*}}, align 8
636*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
637*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
638*67e74705SXin Li   return _mm_load_pd1(A);
639*67e74705SXin Li }
640*67e74705SXin Li 
test_mm_load_sd(double const * A)641*67e74705SXin Li __m128d test_mm_load_sd(double const* A) {
642*67e74705SXin Li   // CHECK-LABEL: test_mm_load_sd
643*67e74705SXin Li   // CHECK: load double, double* %{{.*}}, align 1{{$}}
644*67e74705SXin Li   return _mm_load_sd(A);
645*67e74705SXin Li }
646*67e74705SXin Li 
test_mm_load_si128(__m128i const * A)647*67e74705SXin Li __m128i test_mm_load_si128(__m128i const* A) {
648*67e74705SXin Li   // CHECK-LABEL: test_mm_load_si128
649*67e74705SXin Li   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
650*67e74705SXin Li   return _mm_load_si128(A);
651*67e74705SXin Li }
652*67e74705SXin Li 
test_mm_load1_pd(double const * A)653*67e74705SXin Li __m128d test_mm_load1_pd(double const* A) {
654*67e74705SXin Li   // CHECK-LABEL: test_mm_load1_pd
655*67e74705SXin Li   // CHECK: load double, double* %{{.*}}, align 8
656*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
657*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
658*67e74705SXin Li   return _mm_load1_pd(A);
659*67e74705SXin Li }
660*67e74705SXin Li 
test_mm_loadh_pd(__m128d x,void * y)661*67e74705SXin Li __m128d test_mm_loadh_pd(__m128d x, void* y) {
662*67e74705SXin Li   // CHECK-LABEL: test_mm_loadh_pd
663*67e74705SXin Li   // CHECK: load double, double* %{{.*}}, align 1{{$}}
664*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
665*67e74705SXin Li   return _mm_loadh_pd(x, y);
666*67e74705SXin Li }
667*67e74705SXin Li 
test_mm_loadl_epi64(__m128i * y)668*67e74705SXin Li __m128i test_mm_loadl_epi64(__m128i* y) {
669*67e74705SXin Li   // CHECK: test_mm_loadl_epi64
670*67e74705SXin Li   // CHECK: load i64, i64* {{.*}}, align 1{{$}}
671*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
672*67e74705SXin Li   // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
673*67e74705SXin Li   return _mm_loadl_epi64(y);
674*67e74705SXin Li }
675*67e74705SXin Li 
test_mm_loadl_pd(__m128d x,void * y)676*67e74705SXin Li __m128d test_mm_loadl_pd(__m128d x, void* y) {
677*67e74705SXin Li   // CHECK-LABEL: test_mm_loadl_pd
678*67e74705SXin Li   // CHECK: load double, double* %{{.*}}, align 1{{$}}
679*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
680*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
681*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
682*67e74705SXin Li   return _mm_loadl_pd(x, y);
683*67e74705SXin Li }
684*67e74705SXin Li 
test_mm_loadr_pd(double const * A)685*67e74705SXin Li __m128d test_mm_loadr_pd(double const* A) {
686*67e74705SXin Li   // CHECK-LABEL: test_mm_loadr_pd
687*67e74705SXin Li   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
688*67e74705SXin Li   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
689*67e74705SXin Li   return _mm_loadr_pd(A);
690*67e74705SXin Li }
691*67e74705SXin Li 
test_mm_loadu_pd(double const * A)692*67e74705SXin Li __m128d test_mm_loadu_pd(double const* A) {
693*67e74705SXin Li   // CHECK-LABEL: test_mm_loadu_pd
694*67e74705SXin Li   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
695*67e74705SXin Li   return _mm_loadu_pd(A);
696*67e74705SXin Li }
697*67e74705SXin Li 
test_mm_loadu_si128(__m128i const * A)698*67e74705SXin Li __m128i test_mm_loadu_si128(__m128i const* A) {
699*67e74705SXin Li   // CHECK-LABEL: test_mm_loadu_si128
700*67e74705SXin Li   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
701*67e74705SXin Li   return _mm_loadu_si128(A);
702*67e74705SXin Li }
703*67e74705SXin Li 
test_mm_loadu_si64(void const * A)704*67e74705SXin Li __m128i test_mm_loadu_si64(void const* A) {
705*67e74705SXin Li   // CHECK-LABEL: test_mm_loadu_si64
706*67e74705SXin Li   // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
707*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
708*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
709*67e74705SXin Li   return _mm_loadu_si64(A);
710*67e74705SXin Li }
711*67e74705SXin Li 
test_mm_madd_epi16(__m128i A,__m128i B)712*67e74705SXin Li __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
713*67e74705SXin Li   // CHECK-LABEL: test_mm_madd_epi16
714*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
715*67e74705SXin Li   return _mm_madd_epi16(A, B);
716*67e74705SXin Li }
717*67e74705SXin Li 
test_mm_maskmoveu_si128(__m128i A,__m128i B,char * C)718*67e74705SXin Li void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
719*67e74705SXin Li   // CHECK-LABEL: test_mm_maskmoveu_si128
720*67e74705SXin Li   // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
721*67e74705SXin Li   _mm_maskmoveu_si128(A, B, C);
722*67e74705SXin Li }
723*67e74705SXin Li 
test_mm_max_epi16(__m128i A,__m128i B)724*67e74705SXin Li __m128i test_mm_max_epi16(__m128i A, __m128i B) {
725*67e74705SXin Li   // CHECK-LABEL: test_mm_max_epi16
726*67e74705SXin Li   // CHECK:       [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
727*67e74705SXin Li   // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
728*67e74705SXin Li   return _mm_max_epi16(A, B);
729*67e74705SXin Li }
730*67e74705SXin Li 
test_mm_max_epu8(__m128i A,__m128i B)731*67e74705SXin Li __m128i test_mm_max_epu8(__m128i A, __m128i B) {
732*67e74705SXin Li   // CHECK-LABEL: test_mm_max_epu8
733*67e74705SXin Li   // CHECK:       [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
734*67e74705SXin Li   // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
735*67e74705SXin Li   return _mm_max_epu8(A, B);
736*67e74705SXin Li }
737*67e74705SXin Li 
test_mm_max_pd(__m128d A,__m128d B)738*67e74705SXin Li __m128d test_mm_max_pd(__m128d A, __m128d B) {
739*67e74705SXin Li   // CHECK-LABEL: test_mm_max_pd
740*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
741*67e74705SXin Li   return _mm_max_pd(A, B);
742*67e74705SXin Li }
743*67e74705SXin Li 
test_mm_max_sd(__m128d A,__m128d B)744*67e74705SXin Li __m128d test_mm_max_sd(__m128d A, __m128d B) {
745*67e74705SXin Li   // CHECK-LABEL: test_mm_max_sd
746*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
747*67e74705SXin Li   return _mm_max_sd(A, B);
748*67e74705SXin Li }
749*67e74705SXin Li 
test_mm_mfence()750*67e74705SXin Li void test_mm_mfence() {
751*67e74705SXin Li   // CHECK-LABEL: test_mm_mfence
752*67e74705SXin Li   // CHECK: call void @llvm.x86.sse2.mfence()
753*67e74705SXin Li   _mm_mfence();
754*67e74705SXin Li }
755*67e74705SXin Li 
test_mm_min_epi16(__m128i A,__m128i B)756*67e74705SXin Li __m128i test_mm_min_epi16(__m128i A, __m128i B) {
757*67e74705SXin Li   // CHECK-LABEL: test_mm_min_epi16
758*67e74705SXin Li   // CHECK:       [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
759*67e74705SXin Li   // CHECK-NEXT:  select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
760*67e74705SXin Li   return _mm_min_epi16(A, B);
761*67e74705SXin Li }
762*67e74705SXin Li 
test_mm_min_epu8(__m128i A,__m128i B)763*67e74705SXin Li __m128i test_mm_min_epu8(__m128i A, __m128i B) {
764*67e74705SXin Li   // CHECK-LABEL: test_mm_min_epu8
765*67e74705SXin Li   // CHECK:       [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
766*67e74705SXin Li   // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
767*67e74705SXin Li   return _mm_min_epu8(A, B);
768*67e74705SXin Li }
769*67e74705SXin Li 
test_mm_min_pd(__m128d A,__m128d B)770*67e74705SXin Li __m128d test_mm_min_pd(__m128d A, __m128d B) {
771*67e74705SXin Li   // CHECK-LABEL: test_mm_min_pd
772*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
773*67e74705SXin Li   return _mm_min_pd(A, B);
774*67e74705SXin Li }
775*67e74705SXin Li 
test_mm_min_sd(__m128d A,__m128d B)776*67e74705SXin Li __m128d test_mm_min_sd(__m128d A, __m128d B) {
777*67e74705SXin Li   // CHECK-LABEL: test_mm_min_sd
778*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
779*67e74705SXin Li   return _mm_min_sd(A, B);
780*67e74705SXin Li }
781*67e74705SXin Li 
test_mm_move_epi64(__m128i A)782*67e74705SXin Li __m128i test_mm_move_epi64(__m128i A) {
783*67e74705SXin Li   // CHECK-LABEL: test_mm_move_epi64
784*67e74705SXin Li   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
785*67e74705SXin Li   return _mm_move_epi64(A);
786*67e74705SXin Li }
787*67e74705SXin Li 
test_mm_move_sd(__m128d A,__m128d B)788*67e74705SXin Li __m128d test_mm_move_sd(__m128d A, __m128d B) {
789*67e74705SXin Li   // CHECK-LABEL: test_mm_move_sd
790*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
791*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
792*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
793*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
794*67e74705SXin Li   return _mm_move_sd(A, B);
795*67e74705SXin Li }
796*67e74705SXin Li 
test_mm_movemask_epi8(__m128i A)797*67e74705SXin Li int test_mm_movemask_epi8(__m128i A) {
798*67e74705SXin Li   // CHECK-LABEL: test_mm_movemask_epi8
799*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
800*67e74705SXin Li   return _mm_movemask_epi8(A);
801*67e74705SXin Li }
802*67e74705SXin Li 
test_mm_movemask_pd(__m128d A)803*67e74705SXin Li int test_mm_movemask_pd(__m128d A) {
804*67e74705SXin Li   // CHECK-LABEL: test_mm_movemask_pd
805*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
806*67e74705SXin Li   return _mm_movemask_pd(A);
807*67e74705SXin Li }
808*67e74705SXin Li 
test_mm_mul_epu32(__m128i A,__m128i B)809*67e74705SXin Li __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
810*67e74705SXin Li   // CHECK-LABEL: test_mm_mul_epu32
811*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
812*67e74705SXin Li   return _mm_mul_epu32(A, B);
813*67e74705SXin Li }
814*67e74705SXin Li 
test_mm_mul_pd(__m128d A,__m128d B)815*67e74705SXin Li __m128d test_mm_mul_pd(__m128d A, __m128d B) {
816*67e74705SXin Li   // CHECK-LABEL: test_mm_mul_pd
817*67e74705SXin Li   // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
818*67e74705SXin Li   return _mm_mul_pd(A, B);
819*67e74705SXin Li }
820*67e74705SXin Li 
test_mm_mul_sd(__m128d A,__m128d B)821*67e74705SXin Li __m128d test_mm_mul_sd(__m128d A, __m128d B) {
822*67e74705SXin Li   // CHECK-LABEL: test_mm_mul_sd
823*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
824*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
825*67e74705SXin Li   // CHECK: fmul double
826*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
827*67e74705SXin Li   return _mm_mul_sd(A, B);
828*67e74705SXin Li }
829*67e74705SXin Li 
test_mm_mulhi_epi16(__m128i A,__m128i B)830*67e74705SXin Li __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
831*67e74705SXin Li   // CHECK-LABEL: test_mm_mulhi_epi16
832*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
833*67e74705SXin Li   return _mm_mulhi_epi16(A, B);
834*67e74705SXin Li }
835*67e74705SXin Li 
test_mm_mulhi_epu16(__m128i A,__m128i B)836*67e74705SXin Li __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
837*67e74705SXin Li   // CHECK-LABEL: test_mm_mulhi_epu16
838*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
839*67e74705SXin Li   return _mm_mulhi_epu16(A, B);
840*67e74705SXin Li }
841*67e74705SXin Li 
test_mm_mullo_epi16(__m128i A,__m128i B)842*67e74705SXin Li __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
843*67e74705SXin Li   // CHECK-LABEL: test_mm_mullo_epi16
844*67e74705SXin Li   // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
845*67e74705SXin Li   return _mm_mullo_epi16(A, B);
846*67e74705SXin Li }
847*67e74705SXin Li 
test_mm_or_pd(__m128d A,__m128d B)848*67e74705SXin Li __m128d test_mm_or_pd(__m128d A, __m128d B) {
849*67e74705SXin Li   // CHECK-LABEL: test_mm_or_pd
850*67e74705SXin Li   // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
851*67e74705SXin Li   return _mm_or_pd(A, B);
852*67e74705SXin Li }
853*67e74705SXin Li 
test_mm_or_si128(__m128i A,__m128i B)854*67e74705SXin Li __m128i test_mm_or_si128(__m128i A, __m128i B) {
855*67e74705SXin Li   // CHECK-LABEL: test_mm_or_si128
856*67e74705SXin Li   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
857*67e74705SXin Li   return _mm_or_si128(A, B);
858*67e74705SXin Li }
859*67e74705SXin Li 
test_mm_packs_epi16(__m128i A,__m128i B)860*67e74705SXin Li __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
861*67e74705SXin Li   // CHECK-LABEL: test_mm_packs_epi16
862*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
863*67e74705SXin Li   return _mm_packs_epi16(A, B);
864*67e74705SXin Li }
865*67e74705SXin Li 
test_mm_packs_epi32(__m128i A,__m128i B)866*67e74705SXin Li __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
867*67e74705SXin Li   // CHECK-LABEL: test_mm_packs_epi32
868*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
869*67e74705SXin Li   return _mm_packs_epi32(A, B);
870*67e74705SXin Li }
871*67e74705SXin Li 
test_mm_packus_epi16(__m128i A,__m128i B)872*67e74705SXin Li __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
873*67e74705SXin Li   // CHECK-LABEL: test_mm_packus_epi16
874*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
875*67e74705SXin Li   return _mm_packus_epi16(A, B);
876*67e74705SXin Li }
877*67e74705SXin Li 
test_mm_pause()878*67e74705SXin Li void test_mm_pause() {
879*67e74705SXin Li   // CHECK-LABEL: test_mm_pause
880*67e74705SXin Li   // CHECK: call void @llvm.x86.sse2.pause()
881*67e74705SXin Li   return _mm_pause();
882*67e74705SXin Li }
883*67e74705SXin Li 
test_mm_sad_epu8(__m128i A,__m128i B)884*67e74705SXin Li __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
885*67e74705SXin Li   // CHECK-LABEL: test_mm_sad_epu8
886*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
887*67e74705SXin Li   return _mm_sad_epu8(A, B);
888*67e74705SXin Li }
889*67e74705SXin Li 
test_mm_set_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)890*67e74705SXin Li __m128i test_mm_set_epi8(char A, char B, char C, char D,
891*67e74705SXin Li                          char E, char F, char G, char H,
892*67e74705SXin Li                          char I, char J, char K, char L,
893*67e74705SXin Li                          char M, char N, char O, char P) {
894*67e74705SXin Li   // CHECK-LABEL: test_mm_set_epi8
895*67e74705SXin Li   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
896*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
897*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
898*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
899*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
900*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
901*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
902*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
903*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
904*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
905*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
906*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
907*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
908*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
909*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
910*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
911*67e74705SXin Li   return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
912*67e74705SXin Li }
913*67e74705SXin Li 
test_mm_set_epi16(short A,short B,short C,short D,short E,short F,short G,short H)914*67e74705SXin Li __m128i test_mm_set_epi16(short A, short B, short C, short D,
915*67e74705SXin Li                           short E, short F, short G, short H) {
916*67e74705SXin Li   // CHECK-LABEL: test_mm_set_epi16
917*67e74705SXin Li   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
918*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
919*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
920*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
921*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
922*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
923*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
924*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
925*67e74705SXin Li   return _mm_set_epi16(A, B, C, D, E, F, G, H);
926*67e74705SXin Li }
927*67e74705SXin Li 
test_mm_set_epi32(int A,int B,int C,int D)928*67e74705SXin Li __m128i test_mm_set_epi32(int A, int B, int C, int D) {
929*67e74705SXin Li   // CHECK-LABEL: test_mm_set_epi32
930*67e74705SXin Li   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
931*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
932*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
933*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
934*67e74705SXin Li   return _mm_set_epi32(A, B, C, D);
935*67e74705SXin Li }
936*67e74705SXin Li 
test_mm_set_epi64(__m64 A,__m64 B)937*67e74705SXin Li __m128i test_mm_set_epi64(__m64 A, __m64 B) {
938*67e74705SXin Li   // CHECK-LABEL: test_mm_set_epi64
939*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
940*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
941*67e74705SXin Li   return _mm_set_epi64(A, B);
942*67e74705SXin Li }
943*67e74705SXin Li 
test_mm_set_epi64x(long long A,long long B)944*67e74705SXin Li __m128i test_mm_set_epi64x(long long A, long long B) {
945*67e74705SXin Li   // CHECK-LABEL: test_mm_set_epi64x
946*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
947*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
948*67e74705SXin Li   return _mm_set_epi64x(A, B);
949*67e74705SXin Li }
950*67e74705SXin Li 
test_mm_set_pd(double A,double B)951*67e74705SXin Li __m128d test_mm_set_pd(double A, double B) {
952*67e74705SXin Li   // CHECK-LABEL: test_mm_set_pd
953*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
954*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
955*67e74705SXin Li   return _mm_set_pd(A, B);
956*67e74705SXin Li }
957*67e74705SXin Li 
test_mm_set_sd(double A)958*67e74705SXin Li __m128d test_mm_set_sd(double A) {
959*67e74705SXin Li   // CHECK-LABEL: test_mm_set_sd
960*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
961*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
962*67e74705SXin Li   return _mm_set_sd(A);
963*67e74705SXin Li }
964*67e74705SXin Li 
test_mm_set1_epi8(char A)965*67e74705SXin Li __m128i test_mm_set1_epi8(char A) {
966*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_epi8
967*67e74705SXin Li   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
968*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
969*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
970*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
971*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
972*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
973*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
974*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
975*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
976*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
977*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
978*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
979*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
980*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
981*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
982*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
983*67e74705SXin Li   return _mm_set1_epi8(A);
984*67e74705SXin Li }
985*67e74705SXin Li 
test_mm_set1_epi16(short A)986*67e74705SXin Li __m128i test_mm_set1_epi16(short A) {
987*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_epi16
988*67e74705SXin Li   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
989*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
990*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
991*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
992*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
993*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
994*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
995*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
996*67e74705SXin Li   return _mm_set1_epi16(A);
997*67e74705SXin Li }
998*67e74705SXin Li 
test_mm_set1_epi32(int A)999*67e74705SXin Li __m128i test_mm_set1_epi32(int A) {
1000*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_epi32
1001*67e74705SXin Li   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1002*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1003*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1004*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1005*67e74705SXin Li   return _mm_set1_epi32(A);
1006*67e74705SXin Li }
1007*67e74705SXin Li 
test_mm_set1_epi64(__m64 A)1008*67e74705SXin Li __m128i test_mm_set1_epi64(__m64 A) {
1009*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_epi64
1010*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1011*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1012*67e74705SXin Li   return _mm_set1_epi64(A);
1013*67e74705SXin Li }
1014*67e74705SXin Li 
test_mm_set1_epi64x(long long A)1015*67e74705SXin Li __m128i test_mm_set1_epi64x(long long A) {
1016*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_epi64x
1017*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1018*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1019*67e74705SXin Li   return _mm_set1_epi64x(A);
1020*67e74705SXin Li }
1021*67e74705SXin Li 
test_mm_set1_pd(double A)1022*67e74705SXin Li __m128d test_mm_set1_pd(double A) {
1023*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_pd
1024*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1025*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1026*67e74705SXin Li   return _mm_set1_pd(A);
1027*67e74705SXin Li }
1028*67e74705SXin Li 
test_mm_setr_epi8(char A,char B,char C,char D,char E,char F,char G,char H,char I,char J,char K,char L,char M,char N,char O,char P)1029*67e74705SXin Li __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1030*67e74705SXin Li                           char E, char F, char G, char H,
1031*67e74705SXin Li                           char I, char J, char K, char L,
1032*67e74705SXin Li                           char M, char N, char O, char P) {
1033*67e74705SXin Li   // CHECK-LABEL: test_mm_setr_epi8
1034*67e74705SXin Li   // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1035*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1036*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1037*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1038*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1039*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1040*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1041*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1042*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1043*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1044*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1045*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1046*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1047*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1048*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1049*67e74705SXin Li   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1050*67e74705SXin Li   return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1051*67e74705SXin Li }
1052*67e74705SXin Li 
test_mm_setr_epi16(short A,short B,short C,short D,short E,short F,short G,short H)1053*67e74705SXin Li __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1054*67e74705SXin Li                            short E, short F, short G, short H) {
1055*67e74705SXin Li   // CHECK-LABEL: test_mm_setr_epi16
1056*67e74705SXin Li   // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1057*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1058*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1059*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1060*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1061*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1062*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1063*67e74705SXin Li   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1064*67e74705SXin Li   return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1065*67e74705SXin Li }
1066*67e74705SXin Li 
test_mm_setr_epi32(int A,int B,int C,int D)1067*67e74705SXin Li __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1068*67e74705SXin Li   // CHECK-LABEL: test_mm_setr_epi32
1069*67e74705SXin Li   // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1070*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1071*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1072*67e74705SXin Li   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1073*67e74705SXin Li   return _mm_setr_epi32(A, B, C, D);
1074*67e74705SXin Li }
1075*67e74705SXin Li 
test_mm_setr_epi64(__m64 A,__m64 B)1076*67e74705SXin Li __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1077*67e74705SXin Li   // CHECK-LABEL: test_mm_setr_epi64
1078*67e74705SXin Li   // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1079*67e74705SXin Li   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1080*67e74705SXin Li   return _mm_setr_epi64(A, B);
1081*67e74705SXin Li }
1082*67e74705SXin Li 
test_mm_setr_pd(double A,double B)1083*67e74705SXin Li __m128d test_mm_setr_pd(double A, double B) {
1084*67e74705SXin Li   // CHECK-LABEL: test_mm_setr_pd
1085*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1086*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1087*67e74705SXin Li   return _mm_setr_pd(A, B);
1088*67e74705SXin Li }
1089*67e74705SXin Li 
test_mm_setzero_pd()1090*67e74705SXin Li __m128d test_mm_setzero_pd() {
1091*67e74705SXin Li   // CHECK-LABEL: test_mm_setzero_pd
1092*67e74705SXin Li   // CHECK: store <2 x double> zeroinitializer
1093*67e74705SXin Li   return _mm_setzero_pd();
1094*67e74705SXin Li }
1095*67e74705SXin Li 
test_mm_setzero_si128()1096*67e74705SXin Li __m128i test_mm_setzero_si128() {
1097*67e74705SXin Li   // CHECK-LABEL: test_mm_setzero_si128
1098*67e74705SXin Li   // CHECK: store <2 x i64> zeroinitializer
1099*67e74705SXin Li   return _mm_setzero_si128();
1100*67e74705SXin Li }
1101*67e74705SXin Li 
test_mm_shuffle_epi32(__m128i A)1102*67e74705SXin Li __m128i test_mm_shuffle_epi32(__m128i A) {
1103*67e74705SXin Li   // CHECK-LABEL: test_mm_shuffle_epi32
1104*67e74705SXin Li   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
1105*67e74705SXin Li   return _mm_shuffle_epi32(A, 0);
1106*67e74705SXin Li }
1107*67e74705SXin Li 
test_mm_shuffle_pd(__m128d A,__m128d B)1108*67e74705SXin Li __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1109*67e74705SXin Li   // CHECK-LABEL: test_mm_shuffle_pd
1110*67e74705SXin Li   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1111*67e74705SXin Li   return _mm_shuffle_pd(A, B, 1);
1112*67e74705SXin Li }
1113*67e74705SXin Li 
test_mm_shufflehi_epi16(__m128i A)1114*67e74705SXin Li __m128i test_mm_shufflehi_epi16(__m128i A) {
1115*67e74705SXin Li   // CHECK-LABEL: test_mm_shufflehi_epi16
1116*67e74705SXin Li   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1117*67e74705SXin Li   return _mm_shufflehi_epi16(A, 0);
1118*67e74705SXin Li }
1119*67e74705SXin Li 
test_mm_shufflelo_epi16(__m128i A)1120*67e74705SXin Li __m128i test_mm_shufflelo_epi16(__m128i A) {
1121*67e74705SXin Li   // CHECK-LABEL: test_mm_shufflelo_epi16
1122*67e74705SXin Li   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1123*67e74705SXin Li   return _mm_shufflelo_epi16(A, 0);
1124*67e74705SXin Li }
1125*67e74705SXin Li 
test_mm_sll_epi16(__m128i A,__m128i B)1126*67e74705SXin Li __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1127*67e74705SXin Li   // CHECK-LABEL: test_mm_sll_epi16
1128*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1129*67e74705SXin Li   return _mm_sll_epi16(A, B);
1130*67e74705SXin Li }
1131*67e74705SXin Li 
test_mm_sll_epi32(__m128i A,__m128i B)1132*67e74705SXin Li __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1133*67e74705SXin Li   // CHECK-LABEL: test_mm_sll_epi32
1134*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1135*67e74705SXin Li   return _mm_sll_epi32(A, B);
1136*67e74705SXin Li }
1137*67e74705SXin Li 
test_mm_sll_epi64(__m128i A,__m128i B)1138*67e74705SXin Li __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1139*67e74705SXin Li   // CHECK-LABEL: test_mm_sll_epi64
1140*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1141*67e74705SXin Li   return _mm_sll_epi64(A, B);
1142*67e74705SXin Li }
1143*67e74705SXin Li 
test_mm_slli_epi16(__m128i A)1144*67e74705SXin Li __m128i test_mm_slli_epi16(__m128i A) {
1145*67e74705SXin Li   // CHECK-LABEL: test_mm_slli_epi16
1146*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1147*67e74705SXin Li   return _mm_slli_epi16(A, 1);
1148*67e74705SXin Li }
1149*67e74705SXin Li 
test_mm_slli_epi32(__m128i A)1150*67e74705SXin Li __m128i test_mm_slli_epi32(__m128i A) {
1151*67e74705SXin Li   // CHECK-LABEL: test_mm_slli_epi32
1152*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1153*67e74705SXin Li   return _mm_slli_epi32(A, 1);
1154*67e74705SXin Li }
1155*67e74705SXin Li 
test_mm_slli_epi64(__m128i A)1156*67e74705SXin Li __m128i test_mm_slli_epi64(__m128i A) {
1157*67e74705SXin Li   // CHECK-LABEL: test_mm_slli_epi64
1158*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1159*67e74705SXin Li   return _mm_slli_epi64(A, 1);
1160*67e74705SXin Li }
1161*67e74705SXin Li 
test_mm_slli_si128(__m128i A)1162*67e74705SXin Li __m128i test_mm_slli_si128(__m128i A) {
1163*67e74705SXin Li   // CHECK-LABEL: test_mm_slli_si128
1164*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1165*67e74705SXin Li   return _mm_slli_si128(A, 5);
1166*67e74705SXin Li }
1167*67e74705SXin Li 
test_mm_slli_si128_2(__m128i A)1168*67e74705SXin Li __m128i test_mm_slli_si128_2(__m128i A) {
1169*67e74705SXin Li   // CHECK-LABEL: test_mm_slli_si128_2
1170*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1171*67e74705SXin Li   return _mm_slli_si128(A, 17);
1172*67e74705SXin Li }
1173*67e74705SXin Li 
test_mm_sqrt_pd(__m128d A)1174*67e74705SXin Li __m128d test_mm_sqrt_pd(__m128d A) {
1175*67e74705SXin Li   // CHECK-LABEL: test_mm_sqrt_pd
1176*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
1177*67e74705SXin Li   return _mm_sqrt_pd(A);
1178*67e74705SXin Li }
1179*67e74705SXin Li 
test_mm_sqrt_sd(__m128d A,__m128d B)1180*67e74705SXin Li __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1181*67e74705SXin Li   // CHECK-LABEL: test_mm_sqrt_sd
1182*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
1183*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1184*67e74705SXin Li   // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1185*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1186*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1187*67e74705SXin Li   return _mm_sqrt_sd(A, B);
1188*67e74705SXin Li }
1189*67e74705SXin Li 
test_mm_sra_epi16(__m128i A,__m128i B)1190*67e74705SXin Li __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1191*67e74705SXin Li   // CHECK-LABEL: test_mm_sra_epi16
1192*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1193*67e74705SXin Li   return _mm_sra_epi16(A, B);
1194*67e74705SXin Li }
1195*67e74705SXin Li 
test_mm_sra_epi32(__m128i A,__m128i B)1196*67e74705SXin Li __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1197*67e74705SXin Li   // CHECK-LABEL: test_mm_sra_epi32
1198*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1199*67e74705SXin Li   return _mm_sra_epi32(A, B);
1200*67e74705SXin Li }
1201*67e74705SXin Li 
test_mm_srai_epi16(__m128i A)1202*67e74705SXin Li __m128i test_mm_srai_epi16(__m128i A) {
1203*67e74705SXin Li   // CHECK-LABEL: test_mm_srai_epi16
1204*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1205*67e74705SXin Li   return _mm_srai_epi16(A, 1);
1206*67e74705SXin Li }
1207*67e74705SXin Li 
test_mm_srai_epi32(__m128i A)1208*67e74705SXin Li __m128i test_mm_srai_epi32(__m128i A) {
1209*67e74705SXin Li   // CHECK-LABEL: test_mm_srai_epi32
1210*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1211*67e74705SXin Li   return _mm_srai_epi32(A, 1);
1212*67e74705SXin Li }
1213*67e74705SXin Li 
test_mm_srl_epi16(__m128i A,__m128i B)1214*67e74705SXin Li __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1215*67e74705SXin Li   // CHECK-LABEL: test_mm_srl_epi16
1216*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1217*67e74705SXin Li   return _mm_srl_epi16(A, B);
1218*67e74705SXin Li }
1219*67e74705SXin Li 
test_mm_srl_epi32(__m128i A,__m128i B)1220*67e74705SXin Li __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1221*67e74705SXin Li   // CHECK-LABEL: test_mm_srl_epi32
1222*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1223*67e74705SXin Li   return _mm_srl_epi32(A, B);
1224*67e74705SXin Li }
1225*67e74705SXin Li 
test_mm_srl_epi64(__m128i A,__m128i B)1226*67e74705SXin Li __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1227*67e74705SXin Li   // CHECK-LABEL: test_mm_srl_epi64
1228*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1229*67e74705SXin Li   return _mm_srl_epi64(A, B);
1230*67e74705SXin Li }
1231*67e74705SXin Li 
test_mm_srli_epi16(__m128i A)1232*67e74705SXin Li __m128i test_mm_srli_epi16(__m128i A) {
1233*67e74705SXin Li   // CHECK-LABEL: test_mm_srli_epi16
1234*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1235*67e74705SXin Li   return _mm_srli_epi16(A, 1);
1236*67e74705SXin Li }
1237*67e74705SXin Li 
test_mm_srli_epi32(__m128i A)1238*67e74705SXin Li __m128i test_mm_srli_epi32(__m128i A) {
1239*67e74705SXin Li   // CHECK-LABEL: test_mm_srli_epi32
1240*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1241*67e74705SXin Li   return _mm_srli_epi32(A, 1);
1242*67e74705SXin Li }
1243*67e74705SXin Li 
test_mm_srli_epi64(__m128i A)1244*67e74705SXin Li __m128i test_mm_srli_epi64(__m128i A) {
1245*67e74705SXin Li   // CHECK-LABEL: test_mm_srli_epi64
1246*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1247*67e74705SXin Li   return _mm_srli_epi64(A, 1);
1248*67e74705SXin Li }
1249*67e74705SXin Li 
test_mm_srli_si128(__m128i A)1250*67e74705SXin Li __m128i test_mm_srli_si128(__m128i A) {
1251*67e74705SXin Li   // CHECK-LABEL: test_mm_srli_si128
1252*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1253*67e74705SXin Li   return _mm_srli_si128(A, 5);
1254*67e74705SXin Li }
1255*67e74705SXin Li 
test_mm_srli_si128_2(__m128i A)1256*67e74705SXin Li __m128i test_mm_srli_si128_2(__m128i A) {
1257*67e74705SXin Li   // CHECK-LABEL: test_mm_srli_si128_2
1258*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1259*67e74705SXin Li   return _mm_srli_si128(A, 17);
1260*67e74705SXin Li }
1261*67e74705SXin Li 
test_mm_store_pd(double * A,__m128d B)1262*67e74705SXin Li void test_mm_store_pd(double* A, __m128d B) {
1263*67e74705SXin Li   // CHECK-LABEL: test_mm_store_pd
1264*67e74705SXin Li   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1265*67e74705SXin Li   _mm_store_pd(A, B);
1266*67e74705SXin Li }
1267*67e74705SXin Li 
test_mm_store_pd1(double * x,__m128d y)1268*67e74705SXin Li void test_mm_store_pd1(double* x, __m128d y) {
1269*67e74705SXin Li   // CHECK-LABEL: test_mm_store_pd1
1270*67e74705SXin Li   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1271*67e74705SXin Li   // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1272*67e74705SXin Li   _mm_store_pd1(x, y);
1273*67e74705SXin Li }
1274*67e74705SXin Li 
test_mm_store_sd(double * A,__m128d B)1275*67e74705SXin Li void test_mm_store_sd(double* A, __m128d B) {
1276*67e74705SXin Li   // CHECK-LABEL: test_mm_store_sd
1277*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1278*67e74705SXin Li   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1279*67e74705SXin Li   _mm_store_sd(A, B);
1280*67e74705SXin Li }
1281*67e74705SXin Li 
test_mm_store_si128(__m128i * A,__m128i B)1282*67e74705SXin Li void test_mm_store_si128(__m128i* A, __m128i B) {
1283*67e74705SXin Li   // CHECK-LABEL: test_mm_store_si128
1284*67e74705SXin Li   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1285*67e74705SXin Li   _mm_store_si128(A, B);
1286*67e74705SXin Li }
1287*67e74705SXin Li 
test_mm_store1_pd(double * x,__m128d y)1288*67e74705SXin Li void test_mm_store1_pd(double* x, __m128d y) {
1289*67e74705SXin Li   // CHECK-LABEL: test_mm_store1_pd
1290*67e74705SXin Li   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1291*67e74705SXin Li   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1292*67e74705SXin Li   _mm_store1_pd(x, y);
1293*67e74705SXin Li }
1294*67e74705SXin Li 
test_mm_storeh_pd(double * A,__m128d B)1295*67e74705SXin Li void test_mm_storeh_pd(double* A, __m128d B) {
1296*67e74705SXin Li   // CHECK-LABEL: test_mm_storeh_pd
1297*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1298*67e74705SXin Li   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1299*67e74705SXin Li   _mm_storeh_pd(A, B);
1300*67e74705SXin Li }
1301*67e74705SXin Li 
test_mm_storel_epi64(__m128i x,void * y)1302*67e74705SXin Li void test_mm_storel_epi64(__m128i x, void* y) {
1303*67e74705SXin Li   // CHECK-LABEL: test_mm_storel_epi64
1304*67e74705SXin Li   // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1305*67e74705SXin Li   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1306*67e74705SXin Li   _mm_storel_epi64(y, x);
1307*67e74705SXin Li }
1308*67e74705SXin Li 
test_mm_storel_pd(double * A,__m128d B)1309*67e74705SXin Li void test_mm_storel_pd(double* A, __m128d B) {
1310*67e74705SXin Li   // CHECK-LABEL: test_mm_storel_pd
1311*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1312*67e74705SXin Li   // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1313*67e74705SXin Li   _mm_storel_pd(A, B);
1314*67e74705SXin Li }
1315*67e74705SXin Li 
test_mm_storer_pd(__m128d A,double * B)1316*67e74705SXin Li void test_mm_storer_pd(__m128d A, double* B) {
1317*67e74705SXin Li   // CHECK-LABEL: test_mm_storer_pd
1318*67e74705SXin Li   // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1319*67e74705SXin Li   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1320*67e74705SXin Li   _mm_storer_pd(B, A);
1321*67e74705SXin Li }
1322*67e74705SXin Li 
test_mm_storeu_pd(double * A,__m128d B)1323*67e74705SXin Li void test_mm_storeu_pd(double* A, __m128d B) {
1324*67e74705SXin Li   // CHECK-LABEL: test_mm_storeu_pd
1325*67e74705SXin Li   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1326*67e74705SXin Li   // CHECK-NEXT: ret void
1327*67e74705SXin Li   _mm_storeu_pd(A, B);
1328*67e74705SXin Li }
1329*67e74705SXin Li 
test_mm_storeu_si128(__m128i * A,__m128i B)1330*67e74705SXin Li void test_mm_storeu_si128(__m128i* A, __m128i B) {
1331*67e74705SXin Li   // CHECK-LABEL: test_mm_storeu_si128
1332*67e74705SXin Li   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1333*67e74705SXin Li   // CHECK-NEXT: ret void
1334*67e74705SXin Li   _mm_storeu_si128(A, B);
1335*67e74705SXin Li }
1336*67e74705SXin Li 
test_mm_stream_pd(double * A,__m128d B)1337*67e74705SXin Li void test_mm_stream_pd(double *A, __m128d B) {
1338*67e74705SXin Li   // CHECK-LABEL: test_mm_stream_pd
1339*67e74705SXin Li   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1340*67e74705SXin Li   _mm_stream_pd(A, B);
1341*67e74705SXin Li }
1342*67e74705SXin Li 
test_mm_stream_si32(int * A,int B)1343*67e74705SXin Li void test_mm_stream_si32(int *A, int B) {
1344*67e74705SXin Li   // CHECK-LABEL: test_mm_stream_si32
1345*67e74705SXin Li   // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1346*67e74705SXin Li   _mm_stream_si32(A, B);
1347*67e74705SXin Li }
1348*67e74705SXin Li 
test_mm_stream_si64(long long * A,long long B)1349*67e74705SXin Li void test_mm_stream_si64(long long *A, long long B) {
1350*67e74705SXin Li   // CHECK-LABEL: test_mm_stream_si64
1351*67e74705SXin Li   // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1352*67e74705SXin Li   _mm_stream_si64(A, B);
1353*67e74705SXin Li }
1354*67e74705SXin Li 
test_mm_stream_si128(__m128i * A,__m128i B)1355*67e74705SXin Li void test_mm_stream_si128(__m128i *A, __m128i B) {
1356*67e74705SXin Li   // CHECK-LABEL: test_mm_stream_si128
1357*67e74705SXin Li   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1358*67e74705SXin Li   _mm_stream_si128(A, B);
1359*67e74705SXin Li }
1360*67e74705SXin Li 
test_mm_sub_epi8(__m128i A,__m128i B)1361*67e74705SXin Li __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1362*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_epi8
1363*67e74705SXin Li   // CHECK: sub <16 x i8>
1364*67e74705SXin Li   return _mm_sub_epi8(A, B);
1365*67e74705SXin Li }
1366*67e74705SXin Li 
test_mm_sub_epi16(__m128i A,__m128i B)1367*67e74705SXin Li __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1368*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_epi16
1369*67e74705SXin Li   // CHECK: sub <8 x i16>
1370*67e74705SXin Li   return _mm_sub_epi16(A, B);
1371*67e74705SXin Li }
1372*67e74705SXin Li 
test_mm_sub_epi32(__m128i A,__m128i B)1373*67e74705SXin Li __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1374*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_epi32
1375*67e74705SXin Li   // CHECK: sub <4 x i32>
1376*67e74705SXin Li   return _mm_sub_epi32(A, B);
1377*67e74705SXin Li }
1378*67e74705SXin Li 
test_mm_sub_epi64(__m128i A,__m128i B)1379*67e74705SXin Li __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1380*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_epi64
1381*67e74705SXin Li   // CHECK: sub <2 x i64>
1382*67e74705SXin Li   return _mm_sub_epi64(A, B);
1383*67e74705SXin Li }
1384*67e74705SXin Li 
test_mm_sub_pd(__m128d A,__m128d B)1385*67e74705SXin Li __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1386*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_pd
1387*67e74705SXin Li   // CHECK: fsub <2 x double>
1388*67e74705SXin Li   return _mm_sub_pd(A, B);
1389*67e74705SXin Li }
1390*67e74705SXin Li 
test_mm_sub_sd(__m128d A,__m128d B)1391*67e74705SXin Li __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1392*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_sd
1393*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1394*67e74705SXin Li   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1395*67e74705SXin Li   // CHECK: fsub double
1396*67e74705SXin Li   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1397*67e74705SXin Li   return _mm_sub_sd(A, B);
1398*67e74705SXin Li }
1399*67e74705SXin Li 
test_mm_subs_epi8(__m128i A,__m128i B)1400*67e74705SXin Li __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1401*67e74705SXin Li   // CHECK-LABEL: test_mm_subs_epi8
1402*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1403*67e74705SXin Li   return _mm_subs_epi8(A, B);
1404*67e74705SXin Li }
1405*67e74705SXin Li 
test_mm_subs_epi16(__m128i A,__m128i B)1406*67e74705SXin Li __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1407*67e74705SXin Li   // CHECK-LABEL: test_mm_subs_epi16
1408*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1409*67e74705SXin Li   return _mm_subs_epi16(A, B);
1410*67e74705SXin Li }
1411*67e74705SXin Li 
test_mm_subs_epu8(__m128i A,__m128i B)1412*67e74705SXin Li __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1413*67e74705SXin Li   // CHECK-LABEL: test_mm_subs_epu8
1414*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1415*67e74705SXin Li   return _mm_subs_epu8(A, B);
1416*67e74705SXin Li }
1417*67e74705SXin Li 
test_mm_subs_epu16(__m128i A,__m128i B)1418*67e74705SXin Li __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1419*67e74705SXin Li   // CHECK-LABEL: test_mm_subs_epu16
1420*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1421*67e74705SXin Li   return _mm_subs_epu16(A, B);
1422*67e74705SXin Li }
1423*67e74705SXin Li 
test_mm_ucomieq_sd(__m128d A,__m128d B)1424*67e74705SXin Li int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1425*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomieq_sd
1426*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1427*67e74705SXin Li   return _mm_ucomieq_sd(A, B);
1428*67e74705SXin Li }
1429*67e74705SXin Li 
test_mm_ucomige_sd(__m128d A,__m128d B)1430*67e74705SXin Li int test_mm_ucomige_sd(__m128d A, __m128d B) {
1431*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomige_sd
1432*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1433*67e74705SXin Li   return _mm_ucomige_sd(A, B);
1434*67e74705SXin Li }
1435*67e74705SXin Li 
test_mm_ucomigt_sd(__m128d A,__m128d B)1436*67e74705SXin Li int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1437*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomigt_sd
1438*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1439*67e74705SXin Li   return _mm_ucomigt_sd(A, B);
1440*67e74705SXin Li }
1441*67e74705SXin Li 
test_mm_ucomile_sd(__m128d A,__m128d B)1442*67e74705SXin Li int test_mm_ucomile_sd(__m128d A, __m128d B) {
1443*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomile_sd
1444*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1445*67e74705SXin Li   return _mm_ucomile_sd(A, B);
1446*67e74705SXin Li }
1447*67e74705SXin Li 
test_mm_ucomilt_sd(__m128d A,__m128d B)1448*67e74705SXin Li int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1449*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomilt_sd
1450*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1451*67e74705SXin Li   return _mm_ucomilt_sd(A, B);
1452*67e74705SXin Li }
1453*67e74705SXin Li 
test_mm_ucomineq_sd(__m128d A,__m128d B)1454*67e74705SXin Li int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1455*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomineq_sd
1456*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1457*67e74705SXin Li   return _mm_ucomineq_sd(A, B);
1458*67e74705SXin Li }
1459*67e74705SXin Li 
test_mm_undefined_pd()1460*67e74705SXin Li __m128d test_mm_undefined_pd() {
1461*67e74705SXin Li   // CHECK-LABEL: @test_mm_undefined_pd
1462*67e74705SXin Li   // CHECK: ret <2 x double> undef
1463*67e74705SXin Li   return _mm_undefined_pd();
1464*67e74705SXin Li }
1465*67e74705SXin Li 
test_mm_undefined_si128()1466*67e74705SXin Li __m128i test_mm_undefined_si128() {
1467*67e74705SXin Li   // CHECK-LABEL: @test_mm_undefined_si128
1468*67e74705SXin Li   // CHECK: ret <2 x i64> undef
1469*67e74705SXin Li   return _mm_undefined_si128();
1470*67e74705SXin Li }
1471*67e74705SXin Li 
test_mm_unpackhi_epi8(__m128i A,__m128i B)1472*67e74705SXin Li __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1473*67e74705SXin Li   // CHECK-LABEL: test_mm_unpackhi_epi8
1474*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1475*67e74705SXin Li   return _mm_unpackhi_epi8(A, B);
1476*67e74705SXin Li }
1477*67e74705SXin Li 
test_mm_unpackhi_epi16(__m128i A,__m128i B)1478*67e74705SXin Li __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1479*67e74705SXin Li   // CHECK-LABEL: test_mm_unpackhi_epi16
1480*67e74705SXin Li   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1481*67e74705SXin Li   return _mm_unpackhi_epi16(A, B);
1482*67e74705SXin Li }
1483*67e74705SXin Li 
test_mm_unpackhi_epi32(__m128i A,__m128i B)1484*67e74705SXin Li __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1485*67e74705SXin Li   // CHECK-LABEL: test_mm_unpackhi_epi32
1486*67e74705SXin Li   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1487*67e74705SXin Li   return _mm_unpackhi_epi32(A, B);
1488*67e74705SXin Li }
1489*67e74705SXin Li 
test_mm_unpackhi_epi64(__m128i A,__m128i B)1490*67e74705SXin Li __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1491*67e74705SXin Li   // CHECK-LABEL: test_mm_unpackhi_epi64
1492*67e74705SXin Li   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1493*67e74705SXin Li   return _mm_unpackhi_epi64(A, B);
1494*67e74705SXin Li }
1495*67e74705SXin Li 
test_mm_unpackhi_pd(__m128d A,__m128d B)1496*67e74705SXin Li __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1497*67e74705SXin Li   // CHECK-LABEL: test_mm_unpackhi_pd
1498*67e74705SXin Li   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1499*67e74705SXin Li   return _mm_unpackhi_pd(A, B);
1500*67e74705SXin Li }
1501*67e74705SXin Li 
test_mm_unpacklo_epi8(__m128i A,__m128i B)1502*67e74705SXin Li __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1503*67e74705SXin Li   // CHECK-LABEL: test_mm_unpacklo_epi8
1504*67e74705SXin Li   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1505*67e74705SXin Li   return _mm_unpacklo_epi8(A, B);
1506*67e74705SXin Li }
1507*67e74705SXin Li 
test_mm_unpacklo_epi16(__m128i A,__m128i B)1508*67e74705SXin Li __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1509*67e74705SXin Li   // CHECK-LABEL: test_mm_unpacklo_epi16
1510*67e74705SXin Li   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1511*67e74705SXin Li   return _mm_unpacklo_epi16(A, B);
1512*67e74705SXin Li }
1513*67e74705SXin Li 
test_mm_unpacklo_epi32(__m128i A,__m128i B)1514*67e74705SXin Li __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1515*67e74705SXin Li   // CHECK-LABEL: test_mm_unpacklo_epi32
1516*67e74705SXin Li   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1517*67e74705SXin Li   return _mm_unpacklo_epi32(A, B);
1518*67e74705SXin Li }
1519*67e74705SXin Li 
test_mm_unpacklo_epi64(__m128i A,__m128i B)1520*67e74705SXin Li __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1521*67e74705SXin Li   // CHECK-LABEL: test_mm_unpacklo_epi64
1522*67e74705SXin Li   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1523*67e74705SXin Li   return _mm_unpacklo_epi64(A, B);
1524*67e74705SXin Li }
1525*67e74705SXin Li 
test_mm_unpacklo_pd(__m128d A,__m128d B)1526*67e74705SXin Li __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1527*67e74705SXin Li   // CHECK-LABEL: test_mm_unpacklo_pd
1528*67e74705SXin Li   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1529*67e74705SXin Li   return _mm_unpacklo_pd(A, B);
1530*67e74705SXin Li }
1531*67e74705SXin Li 
test_mm_xor_pd(__m128d A,__m128d B)1532*67e74705SXin Li __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1533*67e74705SXin Li   // CHECK-LABEL: test_mm_xor_pd
1534*67e74705SXin Li   // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
1535*67e74705SXin Li   return _mm_xor_pd(A, B);
1536*67e74705SXin Li }
1537*67e74705SXin Li 
test_mm_xor_si128(__m128i A,__m128i B)1538*67e74705SXin Li __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1539*67e74705SXin Li   // CHECK-LABEL: test_mm_xor_si128
1540*67e74705SXin Li   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1541*67e74705SXin Li   return _mm_xor_si128(A, B);
1542*67e74705SXin Li }
1543