xref: /aosp_15_r20/external/webp/src/dsp/cpu.h (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1*b2055c35SXin Li // Copyright 2022 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li //   CPU detection functions and macros.
11*b2055c35SXin Li //
12*b2055c35SXin Li // Author: Skal ([email protected])
13*b2055c35SXin Li 
14*b2055c35SXin Li #ifndef WEBP_DSP_CPU_H_
15*b2055c35SXin Li #define WEBP_DSP_CPU_H_
16*b2055c35SXin Li 
17*b2055c35SXin Li #include <stddef.h>
18*b2055c35SXin Li 
19*b2055c35SXin Li #ifdef HAVE_CONFIG_H
20*b2055c35SXin Li #include "src/webp/config.h"
21*b2055c35SXin Li #endif
22*b2055c35SXin Li 
23*b2055c35SXin Li #include "src/webp/types.h"
24*b2055c35SXin Li 
25*b2055c35SXin Li #if defined(__GNUC__)
26*b2055c35SXin Li #define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
27*b2055c35SXin Li #define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
28*b2055c35SXin Li #else
29*b2055c35SXin Li #define LOCAL_GCC_VERSION 0
30*b2055c35SXin Li #define LOCAL_GCC_PREREQ(maj, min) 0
31*b2055c35SXin Li #endif
32*b2055c35SXin Li 
33*b2055c35SXin Li #if defined(__clang__)
34*b2055c35SXin Li #define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
35*b2055c35SXin Li #define LOCAL_CLANG_PREREQ(maj, min) \
36*b2055c35SXin Li   (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
37*b2055c35SXin Li #else
38*b2055c35SXin Li #define LOCAL_CLANG_VERSION 0
39*b2055c35SXin Li #define LOCAL_CLANG_PREREQ(maj, min) 0
40*b2055c35SXin Li #endif
41*b2055c35SXin Li 
42*b2055c35SXin Li #ifndef __has_builtin
43*b2055c35SXin Li #define __has_builtin(x) 0
44*b2055c35SXin Li #endif
45*b2055c35SXin Li 
46*b2055c35SXin Li //------------------------------------------------------------------------------
47*b2055c35SXin Li // x86 defines.
48*b2055c35SXin Li 
49*b2055c35SXin Li #if !defined(HAVE_CONFIG_H)
50*b2055c35SXin Li #if defined(_MSC_VER) && _MSC_VER > 1310 && \
51*b2055c35SXin Li     (defined(_M_X64) || defined(_M_IX86))
52*b2055c35SXin Li #define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
53*b2055c35SXin Li #endif
54*b2055c35SXin Li 
55*b2055c35SXin Li #if defined(_MSC_VER) && _MSC_VER >= 1500 && \
56*b2055c35SXin Li     (defined(_M_X64) || defined(_M_IX86))
57*b2055c35SXin Li #define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
58*b2055c35SXin Li #endif
59*b2055c35SXin Li #endif
60*b2055c35SXin Li 
61*b2055c35SXin Li // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
62*b2055c35SXin Li // files without intrinsics, allowing the corresponding Init() to be called.
63*b2055c35SXin Li // Files containing intrinsics will need to be built targeting the instruction
64*b2055c35SXin Li // set so should succeed on one of the earlier tests.
65*b2055c35SXin Li #if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
66*b2055c35SXin Li     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
67*b2055c35SXin Li #define WEBP_USE_SSE2
68*b2055c35SXin Li #endif
69*b2055c35SXin Li 
70*b2055c35SXin Li #if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
71*b2055c35SXin Li #define WEBP_HAVE_SSE2
72*b2055c35SXin Li #endif
73*b2055c35SXin Li 
74*b2055c35SXin Li #if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
75*b2055c35SXin Li     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
76*b2055c35SXin Li #define WEBP_USE_SSE41
77*b2055c35SXin Li #endif
78*b2055c35SXin Li 
79*b2055c35SXin Li #if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
80*b2055c35SXin Li #define WEBP_HAVE_SSE41
81*b2055c35SXin Li #endif
82*b2055c35SXin Li 
83*b2055c35SXin Li #undef WEBP_MSC_SSE41
84*b2055c35SXin Li #undef WEBP_MSC_SSE2
85*b2055c35SXin Li 
86*b2055c35SXin Li //------------------------------------------------------------------------------
87*b2055c35SXin Li // Arm defines.
88*b2055c35SXin Li 
89*b2055c35SXin Li // The intrinsics currently cause compiler errors with arm-nacl-gcc and the
90*b2055c35SXin Li // inline assembly would need to be modified for use with Native Client.
91*b2055c35SXin Li #if ((defined(__ARM_NEON__) || defined(__aarch64__)) &&       \
92*b2055c35SXin Li      (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
93*b2055c35SXin Li     !defined(__native_client__)
94*b2055c35SXin Li #define WEBP_USE_NEON
95*b2055c35SXin Li #endif
96*b2055c35SXin Li 
97*b2055c35SXin Li #if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
98*b2055c35SXin Li     defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
99*b2055c35SXin Li #define WEBP_ANDROID_NEON  // Android targets that may have NEON
100*b2055c35SXin Li #define WEBP_USE_NEON
101*b2055c35SXin Li #endif
102*b2055c35SXin Li 
103*b2055c35SXin Li // Note: ARM64 is supported in Visual Studio 2017, but requires the direct
104*b2055c35SXin Li // inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
105*b2055c35SXin Li // arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
106*b2055c35SXin Li // vtbl4_u8(); a fix was made in 16.6.
107*b2055c35SXin Li #if defined(_MSC_VER) && \
108*b2055c35SXin Li     ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
109*b2055c35SXin Li      (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC))))
110*b2055c35SXin Li #define WEBP_USE_NEON
111*b2055c35SXin Li #define WEBP_USE_INTRINSICS
112*b2055c35SXin Li #endif
113*b2055c35SXin Li 
114*b2055c35SXin Li #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
115*b2055c35SXin Li #define WEBP_AARCH64 1
116*b2055c35SXin Li #else
117*b2055c35SXin Li #define WEBP_AARCH64 0
118*b2055c35SXin Li #endif
119*b2055c35SXin Li 
120*b2055c35SXin Li #if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
121*b2055c35SXin Li #define WEBP_HAVE_NEON
122*b2055c35SXin Li #endif
123*b2055c35SXin Li 
124*b2055c35SXin Li //------------------------------------------------------------------------------
125*b2055c35SXin Li // MIPS defines.
126*b2055c35SXin Li 
127*b2055c35SXin Li #if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
128*b2055c35SXin Li     (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
129*b2055c35SXin Li #define WEBP_USE_MIPS32
130*b2055c35SXin Li #if (__mips_isa_rev >= 2)
131*b2055c35SXin Li #define WEBP_USE_MIPS32_R2
132*b2055c35SXin Li #if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
133*b2055c35SXin Li #define WEBP_USE_MIPS_DSP_R2
134*b2055c35SXin Li #endif
135*b2055c35SXin Li #endif
136*b2055c35SXin Li #endif
137*b2055c35SXin Li 
138*b2055c35SXin Li #if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
139*b2055c35SXin Li #define WEBP_USE_MSA
140*b2055c35SXin Li #endif
141*b2055c35SXin Li 
142*b2055c35SXin Li //------------------------------------------------------------------------------
143*b2055c35SXin Li 
144*b2055c35SXin Li #ifndef WEBP_DSP_OMIT_C_CODE
145*b2055c35SXin Li #define WEBP_DSP_OMIT_C_CODE 1
146*b2055c35SXin Li #endif
147*b2055c35SXin Li 
148*b2055c35SXin Li #if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
149*b2055c35SXin Li #define WEBP_NEON_OMIT_C_CODE 1
150*b2055c35SXin Li #else
151*b2055c35SXin Li #define WEBP_NEON_OMIT_C_CODE 0
152*b2055c35SXin Li #endif
153*b2055c35SXin Li 
154*b2055c35SXin Li #if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
155*b2055c35SXin Li #define WEBP_NEON_WORK_AROUND_GCC 1
156*b2055c35SXin Li #else
157*b2055c35SXin Li #define WEBP_NEON_WORK_AROUND_GCC 0
158*b2055c35SXin Li #endif
159*b2055c35SXin Li 
160*b2055c35SXin Li //------------------------------------------------------------------------------
161*b2055c35SXin Li 
162*b2055c35SXin Li // This macro prevents thread_sanitizer from reporting known concurrent writes.
163*b2055c35SXin Li #define WEBP_TSAN_IGNORE_FUNCTION
164*b2055c35SXin Li #if defined(__has_feature)
165*b2055c35SXin Li #if __has_feature(thread_sanitizer)
166*b2055c35SXin Li #undef WEBP_TSAN_IGNORE_FUNCTION
167*b2055c35SXin Li #define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
168*b2055c35SXin Li #endif
169*b2055c35SXin Li #endif
170*b2055c35SXin Li 
171*b2055c35SXin Li #if defined(__has_feature)
172*b2055c35SXin Li #if __has_feature(memory_sanitizer)
173*b2055c35SXin Li #define WEBP_MSAN
174*b2055c35SXin Li #endif
175*b2055c35SXin Li #endif
176*b2055c35SXin Li 
177*b2055c35SXin Li #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
178*b2055c35SXin Li #include <pthread.h>  // NOLINT
179*b2055c35SXin Li 
180*b2055c35SXin Li #define WEBP_DSP_INIT(func)                                         \
181*b2055c35SXin Li   do {                                                              \
182*b2055c35SXin Li     static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
183*b2055c35SXin Li         (VP8CPUInfo)&func##_last_cpuinfo_used;                      \
184*b2055c35SXin Li     static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
185*b2055c35SXin Li     if (pthread_mutex_lock(&func##_lock)) break;                    \
186*b2055c35SXin Li     if (func##_last_cpuinfo_used != VP8GetCPUInfo) func();          \
187*b2055c35SXin Li     func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
188*b2055c35SXin Li     (void)pthread_mutex_unlock(&func##_lock);                       \
189*b2055c35SXin Li   } while (0)
190*b2055c35SXin Li #else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
191*b2055c35SXin Li #define WEBP_DSP_INIT(func)                               \
192*b2055c35SXin Li   do {                                                    \
193*b2055c35SXin Li     static volatile VP8CPUInfo func##_last_cpuinfo_used = \
194*b2055c35SXin Li         (VP8CPUInfo)&func##_last_cpuinfo_used;            \
195*b2055c35SXin Li     if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
196*b2055c35SXin Li     func();                                               \
197*b2055c35SXin Li     func##_last_cpuinfo_used = VP8GetCPUInfo;             \
198*b2055c35SXin Li   } while (0)
199*b2055c35SXin Li #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
200*b2055c35SXin Li 
201*b2055c35SXin Li // Defines an Init + helper function that control multiple initialization of
202*b2055c35SXin Li // function pointers / tables.
203*b2055c35SXin Li /* Usage:
204*b2055c35SXin Li    WEBP_DSP_INIT_FUNC(InitFunc) {
205*b2055c35SXin Li      ...function body
206*b2055c35SXin Li    }
207*b2055c35SXin Li */
208*b2055c35SXin Li #define WEBP_DSP_INIT_FUNC(name)                                            \
209*b2055c35SXin Li   static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void);                  \
210*b2055c35SXin Li   WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
211*b2055c35SXin Li   static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
212*b2055c35SXin Li 
213*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNDEF
214*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
215*b2055c35SXin Li #if defined(__clang__) && defined(__has_attribute)
216*b2055c35SXin Li #if __has_attribute(no_sanitize)
217*b2055c35SXin Li // This macro prevents the undefined behavior sanitizer from reporting
218*b2055c35SXin Li // failures. This is only meant to silence unaligned loads on platforms that
219*b2055c35SXin Li // are known to support them.
220*b2055c35SXin Li #undef WEBP_UBSAN_IGNORE_UNDEF
221*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
222*b2055c35SXin Li 
223*b2055c35SXin Li // This macro prevents the undefined behavior sanitizer from reporting
224*b2055c35SXin Li // failures related to unsigned integer overflows. This is only meant to
225*b2055c35SXin Li // silence cases where this well defined behavior is expected.
226*b2055c35SXin Li #undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
227*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
228*b2055c35SXin Li   __attribute__((no_sanitize("unsigned-integer-overflow")))
229*b2055c35SXin Li #endif
230*b2055c35SXin Li #endif
231*b2055c35SXin Li 
232*b2055c35SXin Li // If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
233*b2055c35SXin Li // Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
234*b2055c35SXin Li #if !defined(WEBP_OFFSET_PTR)
235*b2055c35SXin Li #define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
236*b2055c35SXin Li #endif
237*b2055c35SXin Li 
238*b2055c35SXin Li // Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
239*b2055c35SXin Li #if !defined(WEBP_SWAP_16BIT_CSP)
240*b2055c35SXin Li #define WEBP_SWAP_16BIT_CSP 0
241*b2055c35SXin Li #endif
242*b2055c35SXin Li 
243*b2055c35SXin Li // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
244*b2055c35SXin Li #if !defined(WORDS_BIGENDIAN) &&                   \
245*b2055c35SXin Li     (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
246*b2055c35SXin Li      (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
247*b2055c35SXin Li #define WORDS_BIGENDIAN
248*b2055c35SXin Li #endif
249*b2055c35SXin Li 
250*b2055c35SXin Li typedef enum {
251*b2055c35SXin Li   kSSE2,
252*b2055c35SXin Li   kSSE3,
253*b2055c35SXin Li   kSlowSSSE3,  // special feature for slow SSSE3 architectures
254*b2055c35SXin Li   kSSE4_1,
255*b2055c35SXin Li   kAVX,
256*b2055c35SXin Li   kAVX2,
257*b2055c35SXin Li   kNEON,
258*b2055c35SXin Li   kMIPS32,
259*b2055c35SXin Li   kMIPSdspR2,
260*b2055c35SXin Li   kMSA
261*b2055c35SXin Li } CPUFeature;
262*b2055c35SXin Li 
263*b2055c35SXin Li // returns true if the CPU supports the feature.
264*b2055c35SXin Li typedef int (*VP8CPUInfo)(CPUFeature feature);
265*b2055c35SXin Li 
266*b2055c35SXin Li #endif  // WEBP_DSP_CPU_H_
267