xref: /aosp_15_r20/external/clang/lib/Headers/xopintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li 
24*67e74705SXin Li #ifndef __X86INTRIN_H
25*67e74705SXin Li #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li 
28*67e74705SXin Li #ifndef __XOPINTRIN_H
29*67e74705SXin Li #define __XOPINTRIN_H
30*67e74705SXin Li 
31*67e74705SXin Li #include <fma4intrin.h>
32*67e74705SXin Li 
33*67e74705SXin Li /* Define the default attributes for the functions in this file. */
34*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
35*67e74705SXin Li 
36*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)37*67e74705SXin Li _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
38*67e74705SXin Li {
39*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
40*67e74705SXin Li }
41*67e74705SXin Li 
42*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)43*67e74705SXin Li _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
44*67e74705SXin Li {
45*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
46*67e74705SXin Li }
47*67e74705SXin Li 
48*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)49*67e74705SXin Li _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
50*67e74705SXin Li {
51*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
52*67e74705SXin Li }
53*67e74705SXin Li 
54*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)55*67e74705SXin Li _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
56*67e74705SXin Li {
57*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
58*67e74705SXin Li }
59*67e74705SXin Li 
60*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)61*67e74705SXin Li _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
62*67e74705SXin Li {
63*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
64*67e74705SXin Li }
65*67e74705SXin Li 
66*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)67*67e74705SXin Li _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
68*67e74705SXin Li {
69*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
70*67e74705SXin Li }
71*67e74705SXin Li 
72*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)73*67e74705SXin Li _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
74*67e74705SXin Li {
75*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
76*67e74705SXin Li }
77*67e74705SXin Li 
78*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)79*67e74705SXin Li _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
80*67e74705SXin Li {
81*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
82*67e74705SXin Li }
83*67e74705SXin Li 
84*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)85*67e74705SXin Li _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
86*67e74705SXin Li {
87*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
88*67e74705SXin Li }
89*67e74705SXin Li 
90*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)91*67e74705SXin Li _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
92*67e74705SXin Li {
93*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
94*67e74705SXin Li }
95*67e74705SXin Li 
96*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)97*67e74705SXin Li _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
98*67e74705SXin Li {
99*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
100*67e74705SXin Li }
101*67e74705SXin Li 
102*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)103*67e74705SXin Li _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
104*67e74705SXin Li {
105*67e74705SXin Li   return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
106*67e74705SXin Li }
107*67e74705SXin Li 
108*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epi8(__m128i __A)109*67e74705SXin Li _mm_haddw_epi8(__m128i __A)
110*67e74705SXin Li {
111*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
112*67e74705SXin Li }
113*67e74705SXin Li 
114*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi8(__m128i __A)115*67e74705SXin Li _mm_haddd_epi8(__m128i __A)
116*67e74705SXin Li {
117*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
118*67e74705SXin Li }
119*67e74705SXin Li 
120*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi8(__m128i __A)121*67e74705SXin Li _mm_haddq_epi8(__m128i __A)
122*67e74705SXin Li {
123*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
124*67e74705SXin Li }
125*67e74705SXin Li 
126*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi16(__m128i __A)127*67e74705SXin Li _mm_haddd_epi16(__m128i __A)
128*67e74705SXin Li {
129*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
130*67e74705SXin Li }
131*67e74705SXin Li 
132*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi16(__m128i __A)133*67e74705SXin Li _mm_haddq_epi16(__m128i __A)
134*67e74705SXin Li {
135*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
136*67e74705SXin Li }
137*67e74705SXin Li 
138*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi32(__m128i __A)139*67e74705SXin Li _mm_haddq_epi32(__m128i __A)
140*67e74705SXin Li {
141*67e74705SXin Li   return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
142*67e74705SXin Li }
143*67e74705SXin Li 
144*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epu8(__m128i __A)145*67e74705SXin Li _mm_haddw_epu8(__m128i __A)
146*67e74705SXin Li {
147*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
148*67e74705SXin Li }
149*67e74705SXin Li 
150*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu8(__m128i __A)151*67e74705SXin Li _mm_haddd_epu8(__m128i __A)
152*67e74705SXin Li {
153*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
154*67e74705SXin Li }
155*67e74705SXin Li 
156*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu8(__m128i __A)157*67e74705SXin Li _mm_haddq_epu8(__m128i __A)
158*67e74705SXin Li {
159*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
160*67e74705SXin Li }
161*67e74705SXin Li 
162*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu16(__m128i __A)163*67e74705SXin Li _mm_haddd_epu16(__m128i __A)
164*67e74705SXin Li {
165*67e74705SXin Li   return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
166*67e74705SXin Li }
167*67e74705SXin Li 
168*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu16(__m128i __A)169*67e74705SXin Li _mm_haddq_epu16(__m128i __A)
170*67e74705SXin Li {
171*67e74705SXin Li   return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
172*67e74705SXin Li }
173*67e74705SXin Li 
174*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu32(__m128i __A)175*67e74705SXin Li _mm_haddq_epu32(__m128i __A)
176*67e74705SXin Li {
177*67e74705SXin Li   return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
178*67e74705SXin Li }
179*67e74705SXin Li 
180*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubw_epi8(__m128i __A)181*67e74705SXin Li _mm_hsubw_epi8(__m128i __A)
182*67e74705SXin Li {
183*67e74705SXin Li   return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
184*67e74705SXin Li }
185*67e74705SXin Li 
186*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubd_epi16(__m128i __A)187*67e74705SXin Li _mm_hsubd_epi16(__m128i __A)
188*67e74705SXin Li {
189*67e74705SXin Li   return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
190*67e74705SXin Li }
191*67e74705SXin Li 
192*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubq_epi32(__m128i __A)193*67e74705SXin Li _mm_hsubq_epi32(__m128i __A)
194*67e74705SXin Li {
195*67e74705SXin Li   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
196*67e74705SXin Li }
197*67e74705SXin Li 
198*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)199*67e74705SXin Li _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
200*67e74705SXin Li {
201*67e74705SXin Li   return (__m128i)__builtin_ia32_vpcmov((__v2di)__A, (__v2di)__B, (__v2di)__C);
202*67e74705SXin Li }
203*67e74705SXin Li 
204*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)205*67e74705SXin Li _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
206*67e74705SXin Li {
207*67e74705SXin Li   return (__m256i)__builtin_ia32_vpcmov_256((__v4di)__A, (__v4di)__B, (__v4di)__C);
208*67e74705SXin Li }
209*67e74705SXin Li 
210*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)211*67e74705SXin Li _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
212*67e74705SXin Li {
213*67e74705SXin Li   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
214*67e74705SXin Li }
215*67e74705SXin Li 
216*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A,__m128i __B)217*67e74705SXin Li _mm_rot_epi8(__m128i __A, __m128i __B)
218*67e74705SXin Li {
219*67e74705SXin Li   return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
220*67e74705SXin Li }
221*67e74705SXin Li 
222*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A,__m128i __B)223*67e74705SXin Li _mm_rot_epi16(__m128i __A, __m128i __B)
224*67e74705SXin Li {
225*67e74705SXin Li   return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
226*67e74705SXin Li }
227*67e74705SXin Li 
228*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A,__m128i __B)229*67e74705SXin Li _mm_rot_epi32(__m128i __A, __m128i __B)
230*67e74705SXin Li {
231*67e74705SXin Li   return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
232*67e74705SXin Li }
233*67e74705SXin Li 
234*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A,__m128i __B)235*67e74705SXin Li _mm_rot_epi64(__m128i __A, __m128i __B)
236*67e74705SXin Li {
237*67e74705SXin Li   return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
238*67e74705SXin Li }
239*67e74705SXin Li 
240*67e74705SXin Li #define _mm_roti_epi8(A, N) __extension__ ({ \
241*67e74705SXin Li   (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
242*67e74705SXin Li 
243*67e74705SXin Li #define _mm_roti_epi16(A, N) __extension__ ({ \
244*67e74705SXin Li   (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
245*67e74705SXin Li 
246*67e74705SXin Li #define _mm_roti_epi32(A, N) __extension__ ({ \
247*67e74705SXin Li   (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
248*67e74705SXin Li 
249*67e74705SXin Li #define _mm_roti_epi64(A, N) __extension__ ({ \
250*67e74705SXin Li   (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
251*67e74705SXin Li 
252*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A,__m128i __B)253*67e74705SXin Li _mm_shl_epi8(__m128i __A, __m128i __B)
254*67e74705SXin Li {
255*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
256*67e74705SXin Li }
257*67e74705SXin Li 
258*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi16(__m128i __A,__m128i __B)259*67e74705SXin Li _mm_shl_epi16(__m128i __A, __m128i __B)
260*67e74705SXin Li {
261*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
262*67e74705SXin Li }
263*67e74705SXin Li 
264*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi32(__m128i __A,__m128i __B)265*67e74705SXin Li _mm_shl_epi32(__m128i __A, __m128i __B)
266*67e74705SXin Li {
267*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
268*67e74705SXin Li }
269*67e74705SXin Li 
270*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi64(__m128i __A,__m128i __B)271*67e74705SXin Li _mm_shl_epi64(__m128i __A, __m128i __B)
272*67e74705SXin Li {
273*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
274*67e74705SXin Li }
275*67e74705SXin Li 
276*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi8(__m128i __A,__m128i __B)277*67e74705SXin Li _mm_sha_epi8(__m128i __A, __m128i __B)
278*67e74705SXin Li {
279*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
280*67e74705SXin Li }
281*67e74705SXin Li 
282*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi16(__m128i __A,__m128i __B)283*67e74705SXin Li _mm_sha_epi16(__m128i __A, __m128i __B)
284*67e74705SXin Li {
285*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
286*67e74705SXin Li }
287*67e74705SXin Li 
288*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi32(__m128i __A,__m128i __B)289*67e74705SXin Li _mm_sha_epi32(__m128i __A, __m128i __B)
290*67e74705SXin Li {
291*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
292*67e74705SXin Li }
293*67e74705SXin Li 
294*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi64(__m128i __A,__m128i __B)295*67e74705SXin Li _mm_sha_epi64(__m128i __A, __m128i __B)
296*67e74705SXin Li {
297*67e74705SXin Li   return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
298*67e74705SXin Li }
299*67e74705SXin Li 
300*67e74705SXin Li #define _mm_com_epu8(A, B, N) __extension__ ({ \
301*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
302*67e74705SXin Li                                   (__v16qi)(__m128i)(B), (N)); })
303*67e74705SXin Li 
304*67e74705SXin Li #define _mm_com_epu16(A, B, N) __extension__ ({ \
305*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
306*67e74705SXin Li                                   (__v8hi)(__m128i)(B), (N)); })
307*67e74705SXin Li 
308*67e74705SXin Li #define _mm_com_epu32(A, B, N) __extension__ ({ \
309*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
310*67e74705SXin Li                                   (__v4si)(__m128i)(B), (N)); })
311*67e74705SXin Li 
312*67e74705SXin Li #define _mm_com_epu64(A, B, N) __extension__ ({ \
313*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
314*67e74705SXin Li                                   (__v2di)(__m128i)(B), (N)); })
315*67e74705SXin Li 
316*67e74705SXin Li #define _mm_com_epi8(A, B, N) __extension__ ({ \
317*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
318*67e74705SXin Li                                  (__v16qi)(__m128i)(B), (N)); })
319*67e74705SXin Li 
320*67e74705SXin Li #define _mm_com_epi16(A, B, N) __extension__ ({ \
321*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
322*67e74705SXin Li                                  (__v8hi)(__m128i)(B), (N)); })
323*67e74705SXin Li 
324*67e74705SXin Li #define _mm_com_epi32(A, B, N) __extension__ ({ \
325*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
326*67e74705SXin Li                                  (__v4si)(__m128i)(B), (N)); })
327*67e74705SXin Li 
328*67e74705SXin Li #define _mm_com_epi64(A, B, N) __extension__ ({ \
329*67e74705SXin Li   (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
330*67e74705SXin Li                                  (__v2di)(__m128i)(B), (N)); })
331*67e74705SXin Li 
332*67e74705SXin Li #define _MM_PCOMCTRL_LT    0
333*67e74705SXin Li #define _MM_PCOMCTRL_LE    1
334*67e74705SXin Li #define _MM_PCOMCTRL_GT    2
335*67e74705SXin Li #define _MM_PCOMCTRL_GE    3
336*67e74705SXin Li #define _MM_PCOMCTRL_EQ    4
337*67e74705SXin Li #define _MM_PCOMCTRL_NEQ   5
338*67e74705SXin Li #define _MM_PCOMCTRL_FALSE 6
339*67e74705SXin Li #define _MM_PCOMCTRL_TRUE  7
340*67e74705SXin Li 
341*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu8(__m128i __A,__m128i __B)342*67e74705SXin Li _mm_comlt_epu8(__m128i __A, __m128i __B)
343*67e74705SXin Li {
344*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
345*67e74705SXin Li }
346*67e74705SXin Li 
347*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu8(__m128i __A,__m128i __B)348*67e74705SXin Li _mm_comle_epu8(__m128i __A, __m128i __B)
349*67e74705SXin Li {
350*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
351*67e74705SXin Li }
352*67e74705SXin Li 
353*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu8(__m128i __A,__m128i __B)354*67e74705SXin Li _mm_comgt_epu8(__m128i __A, __m128i __B)
355*67e74705SXin Li {
356*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
357*67e74705SXin Li }
358*67e74705SXin Li 
359*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu8(__m128i __A,__m128i __B)360*67e74705SXin Li _mm_comge_epu8(__m128i __A, __m128i __B)
361*67e74705SXin Li {
362*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
363*67e74705SXin Li }
364*67e74705SXin Li 
365*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu8(__m128i __A,__m128i __B)366*67e74705SXin Li _mm_comeq_epu8(__m128i __A, __m128i __B)
367*67e74705SXin Li {
368*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
369*67e74705SXin Li }
370*67e74705SXin Li 
371*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu8(__m128i __A,__m128i __B)372*67e74705SXin Li _mm_comneq_epu8(__m128i __A, __m128i __B)
373*67e74705SXin Li {
374*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
375*67e74705SXin Li }
376*67e74705SXin Li 
377*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu8(__m128i __A,__m128i __B)378*67e74705SXin Li _mm_comfalse_epu8(__m128i __A, __m128i __B)
379*67e74705SXin Li {
380*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
381*67e74705SXin Li }
382*67e74705SXin Li 
383*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu8(__m128i __A,__m128i __B)384*67e74705SXin Li _mm_comtrue_epu8(__m128i __A, __m128i __B)
385*67e74705SXin Li {
386*67e74705SXin Li   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
387*67e74705SXin Li }
388*67e74705SXin Li 
389*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu16(__m128i __A,__m128i __B)390*67e74705SXin Li _mm_comlt_epu16(__m128i __A, __m128i __B)
391*67e74705SXin Li {
392*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
393*67e74705SXin Li }
394*67e74705SXin Li 
395*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu16(__m128i __A,__m128i __B)396*67e74705SXin Li _mm_comle_epu16(__m128i __A, __m128i __B)
397*67e74705SXin Li {
398*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
399*67e74705SXin Li }
400*67e74705SXin Li 
401*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu16(__m128i __A,__m128i __B)402*67e74705SXin Li _mm_comgt_epu16(__m128i __A, __m128i __B)
403*67e74705SXin Li {
404*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
405*67e74705SXin Li }
406*67e74705SXin Li 
407*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu16(__m128i __A,__m128i __B)408*67e74705SXin Li _mm_comge_epu16(__m128i __A, __m128i __B)
409*67e74705SXin Li {
410*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
411*67e74705SXin Li }
412*67e74705SXin Li 
413*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu16(__m128i __A,__m128i __B)414*67e74705SXin Li _mm_comeq_epu16(__m128i __A, __m128i __B)
415*67e74705SXin Li {
416*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
417*67e74705SXin Li }
418*67e74705SXin Li 
419*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu16(__m128i __A,__m128i __B)420*67e74705SXin Li _mm_comneq_epu16(__m128i __A, __m128i __B)
421*67e74705SXin Li {
422*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
423*67e74705SXin Li }
424*67e74705SXin Li 
425*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu16(__m128i __A,__m128i __B)426*67e74705SXin Li _mm_comfalse_epu16(__m128i __A, __m128i __B)
427*67e74705SXin Li {
428*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
429*67e74705SXin Li }
430*67e74705SXin Li 
431*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu16(__m128i __A,__m128i __B)432*67e74705SXin Li _mm_comtrue_epu16(__m128i __A, __m128i __B)
433*67e74705SXin Li {
434*67e74705SXin Li   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
435*67e74705SXin Li }
436*67e74705SXin Li 
437*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu32(__m128i __A,__m128i __B)438*67e74705SXin Li _mm_comlt_epu32(__m128i __A, __m128i __B)
439*67e74705SXin Li {
440*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
441*67e74705SXin Li }
442*67e74705SXin Li 
443*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu32(__m128i __A,__m128i __B)444*67e74705SXin Li _mm_comle_epu32(__m128i __A, __m128i __B)
445*67e74705SXin Li {
446*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
447*67e74705SXin Li }
448*67e74705SXin Li 
449*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu32(__m128i __A,__m128i __B)450*67e74705SXin Li _mm_comgt_epu32(__m128i __A, __m128i __B)
451*67e74705SXin Li {
452*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
453*67e74705SXin Li }
454*67e74705SXin Li 
455*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu32(__m128i __A,__m128i __B)456*67e74705SXin Li _mm_comge_epu32(__m128i __A, __m128i __B)
457*67e74705SXin Li {
458*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
459*67e74705SXin Li }
460*67e74705SXin Li 
461*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu32(__m128i __A,__m128i __B)462*67e74705SXin Li _mm_comeq_epu32(__m128i __A, __m128i __B)
463*67e74705SXin Li {
464*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
465*67e74705SXin Li }
466*67e74705SXin Li 
467*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu32(__m128i __A,__m128i __B)468*67e74705SXin Li _mm_comneq_epu32(__m128i __A, __m128i __B)
469*67e74705SXin Li {
470*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
471*67e74705SXin Li }
472*67e74705SXin Li 
473*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu32(__m128i __A,__m128i __B)474*67e74705SXin Li _mm_comfalse_epu32(__m128i __A, __m128i __B)
475*67e74705SXin Li {
476*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
477*67e74705SXin Li }
478*67e74705SXin Li 
479*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu32(__m128i __A,__m128i __B)480*67e74705SXin Li _mm_comtrue_epu32(__m128i __A, __m128i __B)
481*67e74705SXin Li {
482*67e74705SXin Li   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
483*67e74705SXin Li }
484*67e74705SXin Li 
485*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu64(__m128i __A,__m128i __B)486*67e74705SXin Li _mm_comlt_epu64(__m128i __A, __m128i __B)
487*67e74705SXin Li {
488*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
489*67e74705SXin Li }
490*67e74705SXin Li 
491*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu64(__m128i __A,__m128i __B)492*67e74705SXin Li _mm_comle_epu64(__m128i __A, __m128i __B)
493*67e74705SXin Li {
494*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
495*67e74705SXin Li }
496*67e74705SXin Li 
497*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu64(__m128i __A,__m128i __B)498*67e74705SXin Li _mm_comgt_epu64(__m128i __A, __m128i __B)
499*67e74705SXin Li {
500*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
501*67e74705SXin Li }
502*67e74705SXin Li 
503*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu64(__m128i __A,__m128i __B)504*67e74705SXin Li _mm_comge_epu64(__m128i __A, __m128i __B)
505*67e74705SXin Li {
506*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
507*67e74705SXin Li }
508*67e74705SXin Li 
509*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu64(__m128i __A,__m128i __B)510*67e74705SXin Li _mm_comeq_epu64(__m128i __A, __m128i __B)
511*67e74705SXin Li {
512*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
513*67e74705SXin Li }
514*67e74705SXin Li 
515*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu64(__m128i __A,__m128i __B)516*67e74705SXin Li _mm_comneq_epu64(__m128i __A, __m128i __B)
517*67e74705SXin Li {
518*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
519*67e74705SXin Li }
520*67e74705SXin Li 
521*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu64(__m128i __A,__m128i __B)522*67e74705SXin Li _mm_comfalse_epu64(__m128i __A, __m128i __B)
523*67e74705SXin Li {
524*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
525*67e74705SXin Li }
526*67e74705SXin Li 
527*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu64(__m128i __A,__m128i __B)528*67e74705SXin Li _mm_comtrue_epu64(__m128i __A, __m128i __B)
529*67e74705SXin Li {
530*67e74705SXin Li   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
531*67e74705SXin Li }
532*67e74705SXin Li 
533*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi8(__m128i __A,__m128i __B)534*67e74705SXin Li _mm_comlt_epi8(__m128i __A, __m128i __B)
535*67e74705SXin Li {
536*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
537*67e74705SXin Li }
538*67e74705SXin Li 
539*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi8(__m128i __A,__m128i __B)540*67e74705SXin Li _mm_comle_epi8(__m128i __A, __m128i __B)
541*67e74705SXin Li {
542*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
543*67e74705SXin Li }
544*67e74705SXin Li 
545*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi8(__m128i __A,__m128i __B)546*67e74705SXin Li _mm_comgt_epi8(__m128i __A, __m128i __B)
547*67e74705SXin Li {
548*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
549*67e74705SXin Li }
550*67e74705SXin Li 
551*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi8(__m128i __A,__m128i __B)552*67e74705SXin Li _mm_comge_epi8(__m128i __A, __m128i __B)
553*67e74705SXin Li {
554*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
555*67e74705SXin Li }
556*67e74705SXin Li 
557*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi8(__m128i __A,__m128i __B)558*67e74705SXin Li _mm_comeq_epi8(__m128i __A, __m128i __B)
559*67e74705SXin Li {
560*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
561*67e74705SXin Li }
562*67e74705SXin Li 
563*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi8(__m128i __A,__m128i __B)564*67e74705SXin Li _mm_comneq_epi8(__m128i __A, __m128i __B)
565*67e74705SXin Li {
566*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
567*67e74705SXin Li }
568*67e74705SXin Li 
569*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi8(__m128i __A,__m128i __B)570*67e74705SXin Li _mm_comfalse_epi8(__m128i __A, __m128i __B)
571*67e74705SXin Li {
572*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
573*67e74705SXin Li }
574*67e74705SXin Li 
575*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi8(__m128i __A,__m128i __B)576*67e74705SXin Li _mm_comtrue_epi8(__m128i __A, __m128i __B)
577*67e74705SXin Li {
578*67e74705SXin Li   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
579*67e74705SXin Li }
580*67e74705SXin Li 
581*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi16(__m128i __A,__m128i __B)582*67e74705SXin Li _mm_comlt_epi16(__m128i __A, __m128i __B)
583*67e74705SXin Li {
584*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
585*67e74705SXin Li }
586*67e74705SXin Li 
587*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi16(__m128i __A,__m128i __B)588*67e74705SXin Li _mm_comle_epi16(__m128i __A, __m128i __B)
589*67e74705SXin Li {
590*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
591*67e74705SXin Li }
592*67e74705SXin Li 
593*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi16(__m128i __A,__m128i __B)594*67e74705SXin Li _mm_comgt_epi16(__m128i __A, __m128i __B)
595*67e74705SXin Li {
596*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
597*67e74705SXin Li }
598*67e74705SXin Li 
599*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi16(__m128i __A,__m128i __B)600*67e74705SXin Li _mm_comge_epi16(__m128i __A, __m128i __B)
601*67e74705SXin Li {
602*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
603*67e74705SXin Li }
604*67e74705SXin Li 
605*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi16(__m128i __A,__m128i __B)606*67e74705SXin Li _mm_comeq_epi16(__m128i __A, __m128i __B)
607*67e74705SXin Li {
608*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
609*67e74705SXin Li }
610*67e74705SXin Li 
611*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi16(__m128i __A,__m128i __B)612*67e74705SXin Li _mm_comneq_epi16(__m128i __A, __m128i __B)
613*67e74705SXin Li {
614*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
615*67e74705SXin Li }
616*67e74705SXin Li 
617*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi16(__m128i __A,__m128i __B)618*67e74705SXin Li _mm_comfalse_epi16(__m128i __A, __m128i __B)
619*67e74705SXin Li {
620*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
621*67e74705SXin Li }
622*67e74705SXin Li 
623*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi16(__m128i __A,__m128i __B)624*67e74705SXin Li _mm_comtrue_epi16(__m128i __A, __m128i __B)
625*67e74705SXin Li {
626*67e74705SXin Li   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
627*67e74705SXin Li }
628*67e74705SXin Li 
629*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi32(__m128i __A,__m128i __B)630*67e74705SXin Li _mm_comlt_epi32(__m128i __A, __m128i __B)
631*67e74705SXin Li {
632*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
633*67e74705SXin Li }
634*67e74705SXin Li 
635*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi32(__m128i __A,__m128i __B)636*67e74705SXin Li _mm_comle_epi32(__m128i __A, __m128i __B)
637*67e74705SXin Li {
638*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
639*67e74705SXin Li }
640*67e74705SXin Li 
641*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi32(__m128i __A,__m128i __B)642*67e74705SXin Li _mm_comgt_epi32(__m128i __A, __m128i __B)
643*67e74705SXin Li {
644*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
645*67e74705SXin Li }
646*67e74705SXin Li 
647*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi32(__m128i __A,__m128i __B)648*67e74705SXin Li _mm_comge_epi32(__m128i __A, __m128i __B)
649*67e74705SXin Li {
650*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
651*67e74705SXin Li }
652*67e74705SXin Li 
653*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi32(__m128i __A,__m128i __B)654*67e74705SXin Li _mm_comeq_epi32(__m128i __A, __m128i __B)
655*67e74705SXin Li {
656*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
657*67e74705SXin Li }
658*67e74705SXin Li 
659*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi32(__m128i __A,__m128i __B)660*67e74705SXin Li _mm_comneq_epi32(__m128i __A, __m128i __B)
661*67e74705SXin Li {
662*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
663*67e74705SXin Li }
664*67e74705SXin Li 
665*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi32(__m128i __A,__m128i __B)666*67e74705SXin Li _mm_comfalse_epi32(__m128i __A, __m128i __B)
667*67e74705SXin Li {
668*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
669*67e74705SXin Li }
670*67e74705SXin Li 
671*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi32(__m128i __A,__m128i __B)672*67e74705SXin Li _mm_comtrue_epi32(__m128i __A, __m128i __B)
673*67e74705SXin Li {
674*67e74705SXin Li   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
675*67e74705SXin Li }
676*67e74705SXin Li 
677*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi64(__m128i __A,__m128i __B)678*67e74705SXin Li _mm_comlt_epi64(__m128i __A, __m128i __B)
679*67e74705SXin Li {
680*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
681*67e74705SXin Li }
682*67e74705SXin Li 
683*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi64(__m128i __A,__m128i __B)684*67e74705SXin Li _mm_comle_epi64(__m128i __A, __m128i __B)
685*67e74705SXin Li {
686*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
687*67e74705SXin Li }
688*67e74705SXin Li 
689*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi64(__m128i __A,__m128i __B)690*67e74705SXin Li _mm_comgt_epi64(__m128i __A, __m128i __B)
691*67e74705SXin Li {
692*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
693*67e74705SXin Li }
694*67e74705SXin Li 
695*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi64(__m128i __A,__m128i __B)696*67e74705SXin Li _mm_comge_epi64(__m128i __A, __m128i __B)
697*67e74705SXin Li {
698*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
699*67e74705SXin Li }
700*67e74705SXin Li 
701*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi64(__m128i __A,__m128i __B)702*67e74705SXin Li _mm_comeq_epi64(__m128i __A, __m128i __B)
703*67e74705SXin Li {
704*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
705*67e74705SXin Li }
706*67e74705SXin Li 
707*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi64(__m128i __A,__m128i __B)708*67e74705SXin Li _mm_comneq_epi64(__m128i __A, __m128i __B)
709*67e74705SXin Li {
710*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
711*67e74705SXin Li }
712*67e74705SXin Li 
713*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi64(__m128i __A,__m128i __B)714*67e74705SXin Li _mm_comfalse_epi64(__m128i __A, __m128i __B)
715*67e74705SXin Li {
716*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
717*67e74705SXin Li }
718*67e74705SXin Li 
719*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi64(__m128i __A,__m128i __B)720*67e74705SXin Li _mm_comtrue_epi64(__m128i __A, __m128i __B)
721*67e74705SXin Li {
722*67e74705SXin Li   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
723*67e74705SXin Li }
724*67e74705SXin Li 
725*67e74705SXin Li #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
726*67e74705SXin Li   (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
727*67e74705SXin Li                                      (__v2df)(__m128d)(Y), \
728*67e74705SXin Li                                      (__v2di)(__m128i)(C), (I)); })
729*67e74705SXin Li 
730*67e74705SXin Li #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
731*67e74705SXin Li   (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
732*67e74705SXin Li                                         (__v4df)(__m256d)(Y), \
733*67e74705SXin Li                                         (__v4di)(__m256i)(C), (I)); })
734*67e74705SXin Li 
735*67e74705SXin Li #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
736*67e74705SXin Li   (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
737*67e74705SXin Li                                     (__v4si)(__m128i)(C), (I)); })
738*67e74705SXin Li 
739*67e74705SXin Li #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
740*67e74705SXin Li   (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
741*67e74705SXin Li                                        (__v8sf)(__m256)(Y), \
742*67e74705SXin Li                                        (__v8si)(__m256i)(C), (I)); })
743*67e74705SXin Li 
744*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)745*67e74705SXin Li _mm_frcz_ss(__m128 __A)
746*67e74705SXin Li {
747*67e74705SXin Li   return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
748*67e74705SXin Li }
749*67e74705SXin Li 
750*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_sd(__m128d __A)751*67e74705SXin Li _mm_frcz_sd(__m128d __A)
752*67e74705SXin Li {
753*67e74705SXin Li   return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
754*67e74705SXin Li }
755*67e74705SXin Li 
756*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ps(__m128 __A)757*67e74705SXin Li _mm_frcz_ps(__m128 __A)
758*67e74705SXin Li {
759*67e74705SXin Li   return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
760*67e74705SXin Li }
761*67e74705SXin Li 
762*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_pd(__m128d __A)763*67e74705SXin Li _mm_frcz_pd(__m128d __A)
764*67e74705SXin Li {
765*67e74705SXin Li   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
766*67e74705SXin Li }
767*67e74705SXin Li 
768*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_frcz_ps(__m256 __A)769*67e74705SXin Li _mm256_frcz_ps(__m256 __A)
770*67e74705SXin Li {
771*67e74705SXin Li   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
772*67e74705SXin Li }
773*67e74705SXin Li 
774*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_frcz_pd(__m256d __A)775*67e74705SXin Li _mm256_frcz_pd(__m256d __A)
776*67e74705SXin Li {
777*67e74705SXin Li   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
778*67e74705SXin Li }
779*67e74705SXin Li 
780*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
781*67e74705SXin Li 
782*67e74705SXin Li #endif /* __XOPINTRIN_H */
783