1*67e74705SXin Li /*===------------- avx512vbmiintrin.h - VBMI intrinsics ------------------===
2*67e74705SXin Li *
3*67e74705SXin Li *
4*67e74705SXin Li * Permission is hereby granted, free of charge, to any person obtaining a copy
5*67e74705SXin Li * of this software and associated documentation files (the "Software"), to deal
6*67e74705SXin Li * in the Software without restriction, including without limitation the rights
7*67e74705SXin Li * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8*67e74705SXin Li * copies of the Software, and to permit persons to whom the Software is
9*67e74705SXin Li * furnished to do so, subject to the following conditions:
10*67e74705SXin Li *
11*67e74705SXin Li * The above copyright notice and this permission notice shall be included in
12*67e74705SXin Li * all copies or substantial portions of the Software.
13*67e74705SXin Li *
14*67e74705SXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*67e74705SXin Li * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*67e74705SXin Li * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17*67e74705SXin Li * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18*67e74705SXin Li * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19*67e74705SXin Li * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20*67e74705SXin Li * THE SOFTWARE.
21*67e74705SXin Li *
22*67e74705SXin Li *===-----------------------------------------------------------------------===
23*67e74705SXin Li */
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li
28*67e74705SXin Li #ifndef __VBMIINTRIN_H
29*67e74705SXin Li #define __VBMIINTRIN_H
30*67e74705SXin Li
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi")))
33*67e74705SXin Li
34*67e74705SXin Li
35*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi8(__m512i __A,__m512i __I,__mmask64 __U,__m512i __B)36*67e74705SXin Li _mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
37*67e74705SXin Li __mmask64 __U, __m512i __B)
38*67e74705SXin Li {
39*67e74705SXin Li return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
40*67e74705SXin Li (__v64qi) __I
41*67e74705SXin Li /* idx */ ,
42*67e74705SXin Li (__v64qi) __B,
43*67e74705SXin Li (__mmask64) __U);
44*67e74705SXin Li }
45*67e74705SXin Li
46*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8(__m512i __A,__m512i __I,__m512i __B)47*67e74705SXin Li _mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
48*67e74705SXin Li {
49*67e74705SXin Li return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
50*67e74705SXin Li /* idx */ ,
51*67e74705SXin Li (__v64qi) __A,
52*67e74705SXin Li (__v64qi) __B,
53*67e74705SXin Li (__mmask64) -1);
54*67e74705SXin Li }
55*67e74705SXin Li
56*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutex2var_epi8(__m512i __A,__mmask64 __U,__m512i __I,__m512i __B)57*67e74705SXin Li _mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
58*67e74705SXin Li __m512i __I, __m512i __B)
59*67e74705SXin Li {
60*67e74705SXin Li return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
61*67e74705SXin Li /* idx */ ,
62*67e74705SXin Li (__v64qi) __A,
63*67e74705SXin Li (__v64qi) __B,
64*67e74705SXin Li (__mmask64) __U);
65*67e74705SXin Li }
66*67e74705SXin Li
67*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_epi8(__mmask64 __U,__m512i __A,__m512i __I,__m512i __B)68*67e74705SXin Li _mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
69*67e74705SXin Li __m512i __I, __m512i __B)
70*67e74705SXin Li {
71*67e74705SXin Li return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
72*67e74705SXin Li /* idx */ ,
73*67e74705SXin Li (__v64qi) __A,
74*67e74705SXin Li (__v64qi) __B,
75*67e74705SXin Li (__mmask64) __U);
76*67e74705SXin Li }
77*67e74705SXin Li
78*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutexvar_epi8(__m512i __A,__m512i __B)79*67e74705SXin Li _mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
80*67e74705SXin Li {
81*67e74705SXin Li return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
82*67e74705SXin Li (__v64qi) __A,
83*67e74705SXin Li (__v64qi) _mm512_undefined_epi32 (),
84*67e74705SXin Li (__mmask64) -1);
85*67e74705SXin Li }
86*67e74705SXin Li
87*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_epi8(__mmask64 __M,__m512i __A,__m512i __B)88*67e74705SXin Li _mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
89*67e74705SXin Li __m512i __B)
90*67e74705SXin Li {
91*67e74705SXin Li return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
92*67e74705SXin Li (__v64qi) __A,
93*67e74705SXin Li (__v64qi) _mm512_setzero_si512(),
94*67e74705SXin Li (__mmask64) __M);
95*67e74705SXin Li }
96*67e74705SXin Li
97*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutexvar_epi8(__m512i __W,__mmask64 __M,__m512i __A,__m512i __B)98*67e74705SXin Li _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
99*67e74705SXin Li __m512i __B)
100*67e74705SXin Li {
101*67e74705SXin Li return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
102*67e74705SXin Li (__v64qi) __A,
103*67e74705SXin Li (__v64qi) __W,
104*67e74705SXin Li (__mmask64) __M);
105*67e74705SXin Li }
106*67e74705SXin Li
107*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_multishift_epi64_epi8(__m512i __W,__mmask64 __M,__m512i __X,__m512i __Y)108*67e74705SXin Li _mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
109*67e74705SXin Li {
110*67e74705SXin Li return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
111*67e74705SXin Li (__v64qi) __Y,
112*67e74705SXin Li (__v64qi) __W,
113*67e74705SXin Li (__mmask64) __M);
114*67e74705SXin Li }
115*67e74705SXin Li
116*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M,__m512i __X,__m512i __Y)117*67e74705SXin Li _mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
118*67e74705SXin Li {
119*67e74705SXin Li return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
120*67e74705SXin Li (__v64qi) __Y,
121*67e74705SXin Li (__v64qi) _mm512_setzero_si512 (),
122*67e74705SXin Li (__mmask64) __M);
123*67e74705SXin Li }
124*67e74705SXin Li
125*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_multishift_epi64_epi8(__m512i __X,__m512i __Y)126*67e74705SXin Li _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
127*67e74705SXin Li {
128*67e74705SXin Li return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
129*67e74705SXin Li (__v64qi) __Y,
130*67e74705SXin Li (__v64qi) _mm512_undefined_epi32 (),
131*67e74705SXin Li (__mmask64) -1);
132*67e74705SXin Li }
133*67e74705SXin Li
134*67e74705SXin Li
135*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
136*67e74705SXin Li
137*67e74705SXin Li #endif
138