xref: /aosp_15_r20/external/clang/lib/Headers/fmaintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li 
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li 
28*67e74705SXin Li #ifndef __FMAINTRIN_H
29*67e74705SXin Li #define __FMAINTRIN_H
30*67e74705SXin Li 
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
33*67e74705SXin Li 
34*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmadd_ps(__m128 __A,__m128 __B,__m128 __C)35*67e74705SXin Li _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
36*67e74705SXin Li {
37*67e74705SXin Li   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
38*67e74705SXin Li }
39*67e74705SXin Li 
40*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmadd_pd(__m128d __A,__m128d __B,__m128d __C)41*67e74705SXin Li _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
42*67e74705SXin Li {
43*67e74705SXin Li   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
44*67e74705SXin Li }
45*67e74705SXin Li 
46*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmadd_ss(__m128 __A,__m128 __B,__m128 __C)47*67e74705SXin Li _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
48*67e74705SXin Li {
49*67e74705SXin Li   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
50*67e74705SXin Li }
51*67e74705SXin Li 
52*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmadd_sd(__m128d __A,__m128d __B,__m128d __C)53*67e74705SXin Li _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
54*67e74705SXin Li {
55*67e74705SXin Li   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
56*67e74705SXin Li }
57*67e74705SXin Li 
58*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ps(__m128 __A,__m128 __B,__m128 __C)59*67e74705SXin Li _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
60*67e74705SXin Li {
61*67e74705SXin Li   return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
62*67e74705SXin Li }
63*67e74705SXin Li 
64*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_pd(__m128d __A,__m128d __B,__m128d __C)65*67e74705SXin Li _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
66*67e74705SXin Li {
67*67e74705SXin Li   return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
68*67e74705SXin Li }
69*67e74705SXin Li 
70*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ss(__m128 __A,__m128 __B,__m128 __C)71*67e74705SXin Li _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
72*67e74705SXin Li {
73*67e74705SXin Li   return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
74*67e74705SXin Li }
75*67e74705SXin Li 
76*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_sd(__m128d __A,__m128d __B,__m128d __C)77*67e74705SXin Li _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
78*67e74705SXin Li {
79*67e74705SXin Li   return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
80*67e74705SXin Li }
81*67e74705SXin Li 
82*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ps(__m128 __A,__m128 __B,__m128 __C)83*67e74705SXin Li _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
84*67e74705SXin Li {
85*67e74705SXin Li   return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
86*67e74705SXin Li }
87*67e74705SXin Li 
88*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_pd(__m128d __A,__m128d __B,__m128d __C)89*67e74705SXin Li _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
90*67e74705SXin Li {
91*67e74705SXin Li   return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
92*67e74705SXin Li }
93*67e74705SXin Li 
94*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ss(__m128 __A,__m128 __B,__m128 __C)95*67e74705SXin Li _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
96*67e74705SXin Li {
97*67e74705SXin Li   return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
98*67e74705SXin Li }
99*67e74705SXin Li 
100*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_sd(__m128d __A,__m128d __B,__m128d __C)101*67e74705SXin Li _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
102*67e74705SXin Li {
103*67e74705SXin Li   return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
104*67e74705SXin Li }
105*67e74705SXin Li 
106*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ps(__m128 __A,__m128 __B,__m128 __C)107*67e74705SXin Li _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
108*67e74705SXin Li {
109*67e74705SXin Li   return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
110*67e74705SXin Li }
111*67e74705SXin Li 
112*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_pd(__m128d __A,__m128d __B,__m128d __C)113*67e74705SXin Li _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
114*67e74705SXin Li {
115*67e74705SXin Li   return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
116*67e74705SXin Li }
117*67e74705SXin Li 
118*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ss(__m128 __A,__m128 __B,__m128 __C)119*67e74705SXin Li _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
120*67e74705SXin Li {
121*67e74705SXin Li   return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
122*67e74705SXin Li }
123*67e74705SXin Li 
124*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_sd(__m128d __A,__m128d __B,__m128d __C)125*67e74705SXin Li _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
126*67e74705SXin Li {
127*67e74705SXin Li   return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
128*67e74705SXin Li }
129*67e74705SXin Li 
130*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmaddsub_ps(__m128 __A,__m128 __B,__m128 __C)131*67e74705SXin Li _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
132*67e74705SXin Li {
133*67e74705SXin Li   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
134*67e74705SXin Li }
135*67e74705SXin Li 
136*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmaddsub_pd(__m128d __A,__m128d __B,__m128d __C)137*67e74705SXin Li _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
138*67e74705SXin Li {
139*67e74705SXin Li   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
140*67e74705SXin Li }
141*67e74705SXin Li 
142*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsubadd_ps(__m128 __A,__m128 __B,__m128 __C)143*67e74705SXin Li _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
144*67e74705SXin Li {
145*67e74705SXin Li   return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
146*67e74705SXin Li }
147*67e74705SXin Li 
148*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsubadd_pd(__m128d __A,__m128d __B,__m128d __C)149*67e74705SXin Li _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
150*67e74705SXin Li {
151*67e74705SXin Li   return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
152*67e74705SXin Li }
153*67e74705SXin Li 
154*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmadd_ps(__m256 __A,__m256 __B,__m256 __C)155*67e74705SXin Li _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
156*67e74705SXin Li {
157*67e74705SXin Li   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
158*67e74705SXin Li }
159*67e74705SXin Li 
160*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmadd_pd(__m256d __A,__m256d __B,__m256d __C)161*67e74705SXin Li _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
162*67e74705SXin Li {
163*67e74705SXin Li   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
164*67e74705SXin Li }
165*67e74705SXin Li 
166*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmsub_ps(__m256 __A,__m256 __B,__m256 __C)167*67e74705SXin Li _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
168*67e74705SXin Li {
169*67e74705SXin Li   return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
170*67e74705SXin Li }
171*67e74705SXin Li 
172*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmsub_pd(__m256d __A,__m256d __B,__m256d __C)173*67e74705SXin Li _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
174*67e74705SXin Li {
175*67e74705SXin Li   return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
176*67e74705SXin Li }
177*67e74705SXin Li 
178*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fnmadd_ps(__m256 __A,__m256 __B,__m256 __C)179*67e74705SXin Li _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
180*67e74705SXin Li {
181*67e74705SXin Li   return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
182*67e74705SXin Li }
183*67e74705SXin Li 
184*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fnmadd_pd(__m256d __A,__m256d __B,__m256d __C)185*67e74705SXin Li _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
186*67e74705SXin Li {
187*67e74705SXin Li   return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
188*67e74705SXin Li }
189*67e74705SXin Li 
190*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fnmsub_ps(__m256 __A,__m256 __B,__m256 __C)191*67e74705SXin Li _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
192*67e74705SXin Li {
193*67e74705SXin Li   return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
194*67e74705SXin Li }
195*67e74705SXin Li 
196*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fnmsub_pd(__m256d __A,__m256d __B,__m256d __C)197*67e74705SXin Li _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
198*67e74705SXin Li {
199*67e74705SXin Li   return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
200*67e74705SXin Li }
201*67e74705SXin Li 
202*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmaddsub_ps(__m256 __A,__m256 __B,__m256 __C)203*67e74705SXin Li _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
204*67e74705SXin Li {
205*67e74705SXin Li   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
206*67e74705SXin Li }
207*67e74705SXin Li 
208*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmaddsub_pd(__m256d __A,__m256d __B,__m256d __C)209*67e74705SXin Li _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
210*67e74705SXin Li {
211*67e74705SXin Li   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
212*67e74705SXin Li }
213*67e74705SXin Li 
214*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmsubadd_ps(__m256 __A,__m256 __B,__m256 __C)215*67e74705SXin Li _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
216*67e74705SXin Li {
217*67e74705SXin Li   return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
218*67e74705SXin Li }
219*67e74705SXin Li 
220*67e74705SXin Li static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmsubadd_pd(__m256d __A,__m256d __B,__m256d __C)221*67e74705SXin Li _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
222*67e74705SXin Li {
223*67e74705SXin Li   return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
224*67e74705SXin Li }
225*67e74705SXin Li 
226*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
227*67e74705SXin Li 
228*67e74705SXin Li #endif /* __FMAINTRIN_H */
229