1*67e74705SXin Li /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
2*67e74705SXin Li *
3*67e74705SXin Li * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li * furnished to do so, subject to the following conditions:
9*67e74705SXin Li *
10*67e74705SXin Li * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li * all copies or substantial portions of the Software.
12*67e74705SXin Li *
13*67e74705SXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li * THE SOFTWARE.
20*67e74705SXin Li *
21*67e74705SXin Li *===-----------------------------------------------------------------------===
22*67e74705SXin Li */
23*67e74705SXin Li
24*67e74705SXin Li #ifndef __ARM_ACLE_H
25*67e74705SXin Li #define __ARM_ACLE_H
26*67e74705SXin Li
27*67e74705SXin Li #ifndef __ARM_ACLE
28*67e74705SXin Li #error "ACLE intrinsics support not enabled."
29*67e74705SXin Li #endif
30*67e74705SXin Li
31*67e74705SXin Li #include <stdint.h>
32*67e74705SXin Li
33*67e74705SXin Li #if defined(__cplusplus)
34*67e74705SXin Li extern "C" {
35*67e74705SXin Li #endif
36*67e74705SXin Li
37*67e74705SXin Li /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
38*67e74705SXin Li /* 8.3 Memory barriers */
39*67e74705SXin Li #if !defined(_MSC_VER)
40*67e74705SXin Li #define __dmb(i) __builtin_arm_dmb(i)
41*67e74705SXin Li #define __dsb(i) __builtin_arm_dsb(i)
42*67e74705SXin Li #define __isb(i) __builtin_arm_isb(i)
43*67e74705SXin Li #endif
44*67e74705SXin Li
45*67e74705SXin Li /* 8.4 Hints */
46*67e74705SXin Li
47*67e74705SXin Li #if !defined(_MSC_VER)
__wfi(void)48*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
49*67e74705SXin Li __builtin_arm_wfi();
50*67e74705SXin Li }
51*67e74705SXin Li
__wfe(void)52*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
53*67e74705SXin Li __builtin_arm_wfe();
54*67e74705SXin Li }
55*67e74705SXin Li
__sev(void)56*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
57*67e74705SXin Li __builtin_arm_sev();
58*67e74705SXin Li }
59*67e74705SXin Li
__sevl(void)60*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
61*67e74705SXin Li __builtin_arm_sevl();
62*67e74705SXin Li }
63*67e74705SXin Li
__yield(void)64*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
65*67e74705SXin Li __builtin_arm_yield();
66*67e74705SXin Li }
67*67e74705SXin Li #endif
68*67e74705SXin Li
69*67e74705SXin Li #if __ARM_32BIT_STATE
70*67e74705SXin Li #define __dbg(t) __builtin_arm_dbg(t)
71*67e74705SXin Li #endif
72*67e74705SXin Li
73*67e74705SXin Li /* 8.5 Swap */
74*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t __x,volatile uint32_t * __p)75*67e74705SXin Li __swp(uint32_t __x, volatile uint32_t *__p) {
76*67e74705SXin Li uint32_t v;
77*67e74705SXin Li do
78*67e74705SXin Li v = __builtin_arm_ldrex(__p);
79*67e74705SXin Li while (__builtin_arm_strex(__x, __p));
80*67e74705SXin Li return v;
81*67e74705SXin Li }
82*67e74705SXin Li
83*67e74705SXin Li /* 8.6 Memory prefetch intrinsics */
84*67e74705SXin Li /* 8.6.1 Data prefetch */
85*67e74705SXin Li #define __pld(addr) __pldx(0, 0, 0, addr)
86*67e74705SXin Li
87*67e74705SXin Li #if __ARM_32BIT_STATE
88*67e74705SXin Li #define __pldx(access_kind, cache_level, retention_policy, addr) \
89*67e74705SXin Li __builtin_arm_prefetch(addr, access_kind, 1)
90*67e74705SXin Li #else
91*67e74705SXin Li #define __pldx(access_kind, cache_level, retention_policy, addr) \
92*67e74705SXin Li __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
93*67e74705SXin Li #endif
94*67e74705SXin Li
95*67e74705SXin Li /* 8.6.2 Instruction prefetch */
96*67e74705SXin Li #define __pli(addr) __plix(0, 0, addr)
97*67e74705SXin Li
98*67e74705SXin Li #if __ARM_32BIT_STATE
99*67e74705SXin Li #define __plix(cache_level, retention_policy, addr) \
100*67e74705SXin Li __builtin_arm_prefetch(addr, 0, 0)
101*67e74705SXin Li #else
102*67e74705SXin Li #define __plix(cache_level, retention_policy, addr) \
103*67e74705SXin Li __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
104*67e74705SXin Li #endif
105*67e74705SXin Li
106*67e74705SXin Li /* 8.7 NOP */
__nop(void)107*67e74705SXin Li static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
108*67e74705SXin Li __builtin_arm_nop();
109*67e74705SXin Li }
110*67e74705SXin Li
111*67e74705SXin Li /* 9 DATA-PROCESSING INTRINSICS */
112*67e74705SXin Li /* 9.2 Miscellaneous data-processing intrinsics */
113*67e74705SXin Li /* ROR */
114*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__ror(uint32_t __x,uint32_t __y)115*67e74705SXin Li __ror(uint32_t __x, uint32_t __y) {
116*67e74705SXin Li __y %= 32;
117*67e74705SXin Li if (__y == 0)
118*67e74705SXin Li return __x;
119*67e74705SXin Li return (__x >> __y) | (__x << (32 - __y));
120*67e74705SXin Li }
121*67e74705SXin Li
122*67e74705SXin Li static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rorll(uint64_t __x,uint32_t __y)123*67e74705SXin Li __rorll(uint64_t __x, uint32_t __y) {
124*67e74705SXin Li __y %= 64;
125*67e74705SXin Li if (__y == 0)
126*67e74705SXin Li return __x;
127*67e74705SXin Li return (__x >> __y) | (__x << (64 - __y));
128*67e74705SXin Li }
129*67e74705SXin Li
130*67e74705SXin Li static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__rorl(unsigned long __x,uint32_t __y)131*67e74705SXin Li __rorl(unsigned long __x, uint32_t __y) {
132*67e74705SXin Li #if __SIZEOF_LONG__ == 4
133*67e74705SXin Li return __ror(__x, __y);
134*67e74705SXin Li #else
135*67e74705SXin Li return __rorll(__x, __y);
136*67e74705SXin Li #endif
137*67e74705SXin Li }
138*67e74705SXin Li
139*67e74705SXin Li
140*67e74705SXin Li /* CLZ */
141*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__clz(uint32_t __t)142*67e74705SXin Li __clz(uint32_t __t) {
143*67e74705SXin Li return __builtin_clz(__t);
144*67e74705SXin Li }
145*67e74705SXin Li
146*67e74705SXin Li static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__clzl(unsigned long __t)147*67e74705SXin Li __clzl(unsigned long __t) {
148*67e74705SXin Li return __builtin_clzl(__t);
149*67e74705SXin Li }
150*67e74705SXin Li
151*67e74705SXin Li static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__clzll(uint64_t __t)152*67e74705SXin Li __clzll(uint64_t __t) {
153*67e74705SXin Li return __builtin_clzll(__t);
154*67e74705SXin Li }
155*67e74705SXin Li
156*67e74705SXin Li /* REV */
157*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rev(uint32_t __t)158*67e74705SXin Li __rev(uint32_t __t) {
159*67e74705SXin Li return __builtin_bswap32(__t);
160*67e74705SXin Li }
161*67e74705SXin Li
162*67e74705SXin Li static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__revl(unsigned long __t)163*67e74705SXin Li __revl(unsigned long __t) {
164*67e74705SXin Li #if __SIZEOF_LONG__ == 4
165*67e74705SXin Li return __builtin_bswap32(__t);
166*67e74705SXin Li #else
167*67e74705SXin Li return __builtin_bswap64(__t);
168*67e74705SXin Li #endif
169*67e74705SXin Li }
170*67e74705SXin Li
171*67e74705SXin Li static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__revll(uint64_t __t)172*67e74705SXin Li __revll(uint64_t __t) {
173*67e74705SXin Li return __builtin_bswap64(__t);
174*67e74705SXin Li }
175*67e74705SXin Li
176*67e74705SXin Li /* REV16 */
177*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rev16(uint32_t __t)178*67e74705SXin Li __rev16(uint32_t __t) {
179*67e74705SXin Li return __ror(__rev(__t), 16);
180*67e74705SXin Li }
181*67e74705SXin Li
182*67e74705SXin Li static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rev16ll(uint64_t __t)183*67e74705SXin Li __rev16ll(uint64_t __t) {
184*67e74705SXin Li return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
185*67e74705SXin Li }
186*67e74705SXin Li
187*67e74705SXin Li static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__rev16l(unsigned long __t)188*67e74705SXin Li __rev16l(unsigned long __t) {
189*67e74705SXin Li #if __SIZEOF_LONG__ == 4
190*67e74705SXin Li return __rev16(__t);
191*67e74705SXin Li #else
192*67e74705SXin Li return __rev16ll(__t);
193*67e74705SXin Li #endif
194*67e74705SXin Li }
195*67e74705SXin Li
196*67e74705SXin Li /* REVSH */
197*67e74705SXin Li static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
__revsh(int16_t __t)198*67e74705SXin Li __revsh(int16_t __t) {
199*67e74705SXin Li return __builtin_bswap16(__t);
200*67e74705SXin Li }
201*67e74705SXin Li
202*67e74705SXin Li /* RBIT */
203*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rbit(uint32_t __t)204*67e74705SXin Li __rbit(uint32_t __t) {
205*67e74705SXin Li return __builtin_arm_rbit(__t);
206*67e74705SXin Li }
207*67e74705SXin Li
208*67e74705SXin Li static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rbitll(uint64_t __t)209*67e74705SXin Li __rbitll(uint64_t __t) {
210*67e74705SXin Li #if __ARM_32BIT_STATE
211*67e74705SXin Li return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
212*67e74705SXin Li __builtin_arm_rbit(__t >> 32);
213*67e74705SXin Li #else
214*67e74705SXin Li return __builtin_arm_rbit64(__t);
215*67e74705SXin Li #endif
216*67e74705SXin Li }
217*67e74705SXin Li
218*67e74705SXin Li static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__rbitl(unsigned long __t)219*67e74705SXin Li __rbitl(unsigned long __t) {
220*67e74705SXin Li #if __SIZEOF_LONG__ == 4
221*67e74705SXin Li return __rbit(__t);
222*67e74705SXin Li #else
223*67e74705SXin Li return __rbitll(__t);
224*67e74705SXin Li #endif
225*67e74705SXin Li }
226*67e74705SXin Li
227*67e74705SXin Li /*
228*67e74705SXin Li * 9.4 Saturating intrinsics
229*67e74705SXin Li *
230*67e74705SXin Li * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
231*67e74705SXin Li * intrinsics are implemented and the flag is enabled.
232*67e74705SXin Li */
233*67e74705SXin Li /* 9.4.1 Width-specified saturation intrinsics */
234*67e74705SXin Li #if __ARM_32BIT_STATE
235*67e74705SXin Li #define __ssat(x, y) __builtin_arm_ssat(x, y)
236*67e74705SXin Li #define __usat(x, y) __builtin_arm_usat(x, y)
237*67e74705SXin Li #endif
238*67e74705SXin Li
239*67e74705SXin Li /* 9.4.2 Saturating addition and subtraction intrinsics */
240*67e74705SXin Li #if __ARM_32BIT_STATE
241*67e74705SXin Li static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qadd(int32_t __t,int32_t __v)242*67e74705SXin Li __qadd(int32_t __t, int32_t __v) {
243*67e74705SXin Li return __builtin_arm_qadd(__t, __v);
244*67e74705SXin Li }
245*67e74705SXin Li
246*67e74705SXin Li static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qsub(int32_t __t,int32_t __v)247*67e74705SXin Li __qsub(int32_t __t, int32_t __v) {
248*67e74705SXin Li return __builtin_arm_qsub(__t, __v);
249*67e74705SXin Li }
250*67e74705SXin Li
251*67e74705SXin Li static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qdbl(int32_t __t)252*67e74705SXin Li __qdbl(int32_t __t) {
253*67e74705SXin Li return __builtin_arm_qadd(__t, __t);
254*67e74705SXin Li }
255*67e74705SXin Li #endif
256*67e74705SXin Li
257*67e74705SXin Li /* 9.7 CRC32 intrinsics */
258*67e74705SXin Li #if __ARM_FEATURE_CRC32
259*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32b(uint32_t __a,uint8_t __b)260*67e74705SXin Li __crc32b(uint32_t __a, uint8_t __b) {
261*67e74705SXin Li return __builtin_arm_crc32b(__a, __b);
262*67e74705SXin Li }
263*67e74705SXin Li
264*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32h(uint32_t __a,uint16_t __b)265*67e74705SXin Li __crc32h(uint32_t __a, uint16_t __b) {
266*67e74705SXin Li return __builtin_arm_crc32h(__a, __b);
267*67e74705SXin Li }
268*67e74705SXin Li
269*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32w(uint32_t __a,uint32_t __b)270*67e74705SXin Li __crc32w(uint32_t __a, uint32_t __b) {
271*67e74705SXin Li return __builtin_arm_crc32w(__a, __b);
272*67e74705SXin Li }
273*67e74705SXin Li
274*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32d(uint32_t __a,uint64_t __b)275*67e74705SXin Li __crc32d(uint32_t __a, uint64_t __b) {
276*67e74705SXin Li return __builtin_arm_crc32d(__a, __b);
277*67e74705SXin Li }
278*67e74705SXin Li
279*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32cb(uint32_t __a,uint8_t __b)280*67e74705SXin Li __crc32cb(uint32_t __a, uint8_t __b) {
281*67e74705SXin Li return __builtin_arm_crc32cb(__a, __b);
282*67e74705SXin Li }
283*67e74705SXin Li
284*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32ch(uint32_t __a,uint16_t __b)285*67e74705SXin Li __crc32ch(uint32_t __a, uint16_t __b) {
286*67e74705SXin Li return __builtin_arm_crc32ch(__a, __b);
287*67e74705SXin Li }
288*67e74705SXin Li
289*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32cw(uint32_t __a,uint32_t __b)290*67e74705SXin Li __crc32cw(uint32_t __a, uint32_t __b) {
291*67e74705SXin Li return __builtin_arm_crc32cw(__a, __b);
292*67e74705SXin Li }
293*67e74705SXin Li
294*67e74705SXin Li static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32cd(uint32_t __a,uint64_t __b)295*67e74705SXin Li __crc32cd(uint32_t __a, uint64_t __b) {
296*67e74705SXin Li return __builtin_arm_crc32cd(__a, __b);
297*67e74705SXin Li }
298*67e74705SXin Li #endif
299*67e74705SXin Li
300*67e74705SXin Li /* 10.1 Special register intrinsics */
301*67e74705SXin Li #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
302*67e74705SXin Li #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
303*67e74705SXin Li #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
304*67e74705SXin Li #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
305*67e74705SXin Li #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
306*67e74705SXin Li #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
307*67e74705SXin Li
308*67e74705SXin Li #if defined(__cplusplus)
309*67e74705SXin Li }
310*67e74705SXin Li #endif
311*67e74705SXin Li
312*67e74705SXin Li #endif /* __ARM_ACLE_H */
313