1*67e74705SXin Li /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2*67e74705SXin Li *
3*67e74705SXin Li * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li * furnished to do so, subject to the following conditions:
9*67e74705SXin Li *
10*67e74705SXin Li * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li * all copies or substantial portions of the Software.
12*67e74705SXin Li *
13*67e74705SXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li * THE SOFTWARE.
20*67e74705SXin Li *
21*67e74705SXin Li *===-----------------------------------------------------------------------===
22*67e74705SXin Li */
23*67e74705SXin Li
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #define __IMMINTRIN_H
26*67e74705SXin Li
27*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
28*67e74705SXin Li #include <mmintrin.h>
29*67e74705SXin Li #endif
30*67e74705SXin Li
31*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
32*67e74705SXin Li #include <xmmintrin.h>
33*67e74705SXin Li #endif
34*67e74705SXin Li
35*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
36*67e74705SXin Li #include <emmintrin.h>
37*67e74705SXin Li #endif
38*67e74705SXin Li
39*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
40*67e74705SXin Li #include <pmmintrin.h>
41*67e74705SXin Li #endif
42*67e74705SXin Li
43*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
44*67e74705SXin Li #include <tmmintrin.h>
45*67e74705SXin Li #endif
46*67e74705SXin Li
47*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
48*67e74705SXin Li (defined(__SSE4_2__) || defined(__SSE4_1__))
49*67e74705SXin Li #include <smmintrin.h>
50*67e74705SXin Li #endif
51*67e74705SXin Li
52*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
53*67e74705SXin Li (defined(__AES__) || defined(__PCLMUL__))
54*67e74705SXin Li #include <wmmintrin.h>
55*67e74705SXin Li #endif
56*67e74705SXin Li
57*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
58*67e74705SXin Li #include <clflushoptintrin.h>
59*67e74705SXin Li #endif
60*67e74705SXin Li
61*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
62*67e74705SXin Li #include <avxintrin.h>
63*67e74705SXin Li #endif
64*67e74705SXin Li
65*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
66*67e74705SXin Li #include <avx2intrin.h>
67*67e74705SXin Li
68*67e74705SXin Li /* The 256-bit versions of functions in f16cintrin.h.
69*67e74705SXin Li Intel documents these as being in immintrin.h, and
70*67e74705SXin Li they depend on typedefs from avxintrin.h. */
71*67e74705SXin Li
72*67e74705SXin Li #define _mm256_cvtps_ph(a, imm) __extension__ ({ \
73*67e74705SXin Li (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
74*67e74705SXin Li
75*67e74705SXin Li static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
_mm256_cvtph_ps(__m128i __a)76*67e74705SXin Li _mm256_cvtph_ps(__m128i __a)
77*67e74705SXin Li {
78*67e74705SXin Li return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
79*67e74705SXin Li }
80*67e74705SXin Li #endif /* __AVX2__ */
81*67e74705SXin Li
82*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
83*67e74705SXin Li #include <bmiintrin.h>
84*67e74705SXin Li #endif
85*67e74705SXin Li
86*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
87*67e74705SXin Li #include <bmi2intrin.h>
88*67e74705SXin Li #endif
89*67e74705SXin Li
90*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
91*67e74705SXin Li #include <lzcntintrin.h>
92*67e74705SXin Li #endif
93*67e74705SXin Li
94*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
95*67e74705SXin Li #include <fmaintrin.h>
96*67e74705SXin Li #endif
97*67e74705SXin Li
98*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
99*67e74705SXin Li #include <avx512fintrin.h>
100*67e74705SXin Li #endif
101*67e74705SXin Li
102*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
103*67e74705SXin Li #include <avx512vlintrin.h>
104*67e74705SXin Li #endif
105*67e74705SXin Li
106*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
107*67e74705SXin Li #include <avx512bwintrin.h>
108*67e74705SXin Li #endif
109*67e74705SXin Li
110*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
111*67e74705SXin Li #include <avx512cdintrin.h>
112*67e74705SXin Li #endif
113*67e74705SXin Li
114*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
115*67e74705SXin Li #include <avx512dqintrin.h>
116*67e74705SXin Li #endif
117*67e74705SXin Li
118*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
119*67e74705SXin Li (defined(__AVX512VL__) && defined(__AVX512BW__))
120*67e74705SXin Li #include <avx512vlbwintrin.h>
121*67e74705SXin Li #endif
122*67e74705SXin Li
123*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
124*67e74705SXin Li (defined(__AVX512VL__) && defined(__AVX512CD__))
125*67e74705SXin Li #include <avx512vlcdintrin.h>
126*67e74705SXin Li #endif
127*67e74705SXin Li
128*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
129*67e74705SXin Li (defined(__AVX512VL__) && defined(__AVX512DQ__))
130*67e74705SXin Li #include <avx512vldqintrin.h>
131*67e74705SXin Li #endif
132*67e74705SXin Li
133*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
134*67e74705SXin Li #include <avx512erintrin.h>
135*67e74705SXin Li #endif
136*67e74705SXin Li
137*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
138*67e74705SXin Li #include <avx512ifmaintrin.h>
139*67e74705SXin Li #endif
140*67e74705SXin Li
141*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
142*67e74705SXin Li (defined(__AVX512IFMA__) && defined(__AVX512VL__))
143*67e74705SXin Li #include <avx512ifmavlintrin.h>
144*67e74705SXin Li #endif
145*67e74705SXin Li
146*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
147*67e74705SXin Li #include <avx512vbmiintrin.h>
148*67e74705SXin Li #endif
149*67e74705SXin Li
150*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || \
151*67e74705SXin Li (defined(__AVX512VBMI__) && defined(__AVX512VL__))
152*67e74705SXin Li #include <avx512vbmivlintrin.h>
153*67e74705SXin Li #endif
154*67e74705SXin Li
155*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
156*67e74705SXin Li #include <avx512pfintrin.h>
157*67e74705SXin Li #endif
158*67e74705SXin Li
159*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
160*67e74705SXin Li #include <pkuintrin.h>
161*67e74705SXin Li #endif
162*67e74705SXin Li
163*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
164*67e74705SXin Li static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short * __p)165*67e74705SXin Li _rdrand16_step(unsigned short *__p)
166*67e74705SXin Li {
167*67e74705SXin Li return __builtin_ia32_rdrand16_step(__p);
168*67e74705SXin Li }
169*67e74705SXin Li
170*67e74705SXin Li static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand32_step(unsigned int * __p)171*67e74705SXin Li _rdrand32_step(unsigned int *__p)
172*67e74705SXin Li {
173*67e74705SXin Li return __builtin_ia32_rdrand32_step(__p);
174*67e74705SXin Li }
175*67e74705SXin Li
176*67e74705SXin Li /* __bit_scan_forward */
177*67e74705SXin Li static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bit_scan_forward(int __A)178*67e74705SXin Li _bit_scan_forward(int __A) {
179*67e74705SXin Li return __builtin_ctz(__A);
180*67e74705SXin Li }
181*67e74705SXin Li
182*67e74705SXin Li /* __bit_scan_reverse */
183*67e74705SXin Li static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bit_scan_reverse(int __A)184*67e74705SXin Li _bit_scan_reverse(int __A) {
185*67e74705SXin Li return 31 - __builtin_clz(__A);
186*67e74705SXin Li }
187*67e74705SXin Li
188*67e74705SXin Li #ifdef __x86_64__
189*67e74705SXin Li static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long * __p)190*67e74705SXin Li _rdrand64_step(unsigned long long *__p)
191*67e74705SXin Li {
192*67e74705SXin Li return __builtin_ia32_rdrand64_step(__p);
193*67e74705SXin Li }
194*67e74705SXin Li #endif
195*67e74705SXin Li #endif /* __RDRND__ */
196*67e74705SXin Li
197*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
198*67e74705SXin Li #ifdef __x86_64__
199*67e74705SXin Li static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)200*67e74705SXin Li _readfsbase_u32(void)
201*67e74705SXin Li {
202*67e74705SXin Li return __builtin_ia32_rdfsbase32();
203*67e74705SXin Li }
204*67e74705SXin Li
205*67e74705SXin Li static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u64(void)206*67e74705SXin Li _readfsbase_u64(void)
207*67e74705SXin Li {
208*67e74705SXin Li return __builtin_ia32_rdfsbase64();
209*67e74705SXin Li }
210*67e74705SXin Li
211*67e74705SXin Li static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u32(void)212*67e74705SXin Li _readgsbase_u32(void)
213*67e74705SXin Li {
214*67e74705SXin Li return __builtin_ia32_rdgsbase32();
215*67e74705SXin Li }
216*67e74705SXin Li
217*67e74705SXin Li static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u64(void)218*67e74705SXin Li _readgsbase_u64(void)
219*67e74705SXin Li {
220*67e74705SXin Li return __builtin_ia32_rdgsbase64();
221*67e74705SXin Li }
222*67e74705SXin Li
223*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)224*67e74705SXin Li _writefsbase_u32(unsigned int __V)
225*67e74705SXin Li {
226*67e74705SXin Li return __builtin_ia32_wrfsbase32(__V);
227*67e74705SXin Li }
228*67e74705SXin Li
229*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)230*67e74705SXin Li _writefsbase_u64(unsigned long long __V)
231*67e74705SXin Li {
232*67e74705SXin Li return __builtin_ia32_wrfsbase64(__V);
233*67e74705SXin Li }
234*67e74705SXin Li
235*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)236*67e74705SXin Li _writegsbase_u32(unsigned int __V)
237*67e74705SXin Li {
238*67e74705SXin Li return __builtin_ia32_wrgsbase32(__V);
239*67e74705SXin Li }
240*67e74705SXin Li
241*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)242*67e74705SXin Li _writegsbase_u64(unsigned long long __V)
243*67e74705SXin Li {
244*67e74705SXin Li return __builtin_ia32_wrgsbase64(__V);
245*67e74705SXin Li }
246*67e74705SXin Li
247*67e74705SXin Li #endif
248*67e74705SXin Li #endif /* __FSGSBASE__ */
249*67e74705SXin Li
250*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
251*67e74705SXin Li #include <rtmintrin.h>
252*67e74705SXin Li #include <xtestintrin.h>
253*67e74705SXin Li #endif
254*67e74705SXin Li
255*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
256*67e74705SXin Li #include <shaintrin.h>
257*67e74705SXin Li #endif
258*67e74705SXin Li
259*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
260*67e74705SXin Li #include <fxsrintrin.h>
261*67e74705SXin Li #endif
262*67e74705SXin Li
263*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
264*67e74705SXin Li #include <xsaveintrin.h>
265*67e74705SXin Li #endif
266*67e74705SXin Li
267*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
268*67e74705SXin Li #include <xsaveoptintrin.h>
269*67e74705SXin Li #endif
270*67e74705SXin Li
271*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
272*67e74705SXin Li #include <xsavecintrin.h>
273*67e74705SXin Li #endif
274*67e74705SXin Li
275*67e74705SXin Li #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
276*67e74705SXin Li #include <xsavesintrin.h>
277*67e74705SXin Li #endif
278*67e74705SXin Li
279*67e74705SXin Li /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
280*67e74705SXin Li * whereas others are also available at all times. */
281*67e74705SXin Li #include <adxintrin.h>
282*67e74705SXin Li
283*67e74705SXin Li #endif /* __IMMINTRIN_H */
284