1*b2055c35SXin Li // Copyright 2022 Google Inc. All Rights Reserved. 2*b2055c35SXin Li // 3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license 4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source 5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found 6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may 7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree. 8*b2055c35SXin Li // ----------------------------------------------------------------------------- 9*b2055c35SXin Li // 10*b2055c35SXin Li // CPU detection functions and macros. 11*b2055c35SXin Li // 12*b2055c35SXin Li // Author: Skal ([email protected]) 13*b2055c35SXin Li 14*b2055c35SXin Li #ifndef WEBP_DSP_CPU_H_ 15*b2055c35SXin Li #define WEBP_DSP_CPU_H_ 16*b2055c35SXin Li 17*b2055c35SXin Li #include <stddef.h> 18*b2055c35SXin Li 19*b2055c35SXin Li #ifdef HAVE_CONFIG_H 20*b2055c35SXin Li #include "src/webp/config.h" 21*b2055c35SXin Li #endif 22*b2055c35SXin Li 23*b2055c35SXin Li #include "src/webp/types.h" 24*b2055c35SXin Li 25*b2055c35SXin Li #if defined(__GNUC__) 26*b2055c35SXin Li #define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) 27*b2055c35SXin Li #define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) 28*b2055c35SXin Li #else 29*b2055c35SXin Li #define LOCAL_GCC_VERSION 0 30*b2055c35SXin Li #define LOCAL_GCC_PREREQ(maj, min) 0 31*b2055c35SXin Li #endif 32*b2055c35SXin Li 33*b2055c35SXin Li #if defined(__clang__) 34*b2055c35SXin Li #define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) 35*b2055c35SXin Li #define LOCAL_CLANG_PREREQ(maj, min) \ 36*b2055c35SXin Li (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) 37*b2055c35SXin Li #else 38*b2055c35SXin Li #define LOCAL_CLANG_VERSION 0 39*b2055c35SXin Li #define LOCAL_CLANG_PREREQ(maj, min) 0 40*b2055c35SXin Li #endif 41*b2055c35SXin Li 42*b2055c35SXin Li #ifndef __has_builtin 43*b2055c35SXin Li #define __has_builtin(x) 0 44*b2055c35SXin Li #endif 45*b2055c35SXin Li 46*b2055c35SXin Li //------------------------------------------------------------------------------ 47*b2055c35SXin Li // x86 defines. 48*b2055c35SXin Li 49*b2055c35SXin Li #if !defined(HAVE_CONFIG_H) 50*b2055c35SXin Li #if defined(_MSC_VER) && _MSC_VER > 1310 && \ 51*b2055c35SXin Li (defined(_M_X64) || defined(_M_IX86)) 52*b2055c35SXin Li #define WEBP_MSC_SSE2 // Visual C++ SSE2 targets 53*b2055c35SXin Li #endif 54*b2055c35SXin Li 55*b2055c35SXin Li #if defined(_MSC_VER) && _MSC_VER >= 1500 && \ 56*b2055c35SXin Li (defined(_M_X64) || defined(_M_IX86)) 57*b2055c35SXin Li #define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets 58*b2055c35SXin Li #endif 59*b2055c35SXin Li #endif 60*b2055c35SXin Li 61*b2055c35SXin Li // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp 62*b2055c35SXin Li // files without intrinsics, allowing the corresponding Init() to be called. 63*b2055c35SXin Li // Files containing intrinsics will need to be built targeting the instruction 64*b2055c35SXin Li // set so should succeed on one of the earlier tests. 65*b2055c35SXin Li #if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \ 66*b2055c35SXin Li (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2)) 67*b2055c35SXin Li #define WEBP_USE_SSE2 68*b2055c35SXin Li #endif 69*b2055c35SXin Li 70*b2055c35SXin Li #if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2) 71*b2055c35SXin Li #define WEBP_HAVE_SSE2 72*b2055c35SXin Li #endif 73*b2055c35SXin Li 74*b2055c35SXin Li #if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \ 75*b2055c35SXin Li (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41)) 76*b2055c35SXin Li #define WEBP_USE_SSE41 77*b2055c35SXin Li #endif 78*b2055c35SXin Li 79*b2055c35SXin Li #if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41) 80*b2055c35SXin Li #define WEBP_HAVE_SSE41 81*b2055c35SXin Li #endif 82*b2055c35SXin Li 83*b2055c35SXin Li #undef WEBP_MSC_SSE41 84*b2055c35SXin Li #undef WEBP_MSC_SSE2 85*b2055c35SXin Li 86*b2055c35SXin Li //------------------------------------------------------------------------------ 87*b2055c35SXin Li // Arm defines. 88*b2055c35SXin Li 89*b2055c35SXin Li // The intrinsics currently cause compiler errors with arm-nacl-gcc and the 90*b2055c35SXin Li // inline assembly would need to be modified for use with Native Client. 91*b2055c35SXin Li #if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ 92*b2055c35SXin Li (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \ 93*b2055c35SXin Li !defined(__native_client__) 94*b2055c35SXin Li #define WEBP_USE_NEON 95*b2055c35SXin Li #endif 96*b2055c35SXin Li 97*b2055c35SXin Li #if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ 98*b2055c35SXin Li defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) 99*b2055c35SXin Li #define WEBP_ANDROID_NEON // Android targets that may have NEON 100*b2055c35SXin Li #define WEBP_USE_NEON 101*b2055c35SXin Li #endif 102*b2055c35SXin Li 103*b2055c35SXin Li // Note: ARM64 is supported in Visual Studio 2017, but requires the direct 104*b2055c35SXin Li // inclusion of arm64_neon.h; Visual Studio 2019 includes this file in 105*b2055c35SXin Li // arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with 106*b2055c35SXin Li // vtbl4_u8(); a fix was made in 16.6. 107*b2055c35SXin Li #if defined(_MSC_VER) && \ 108*b2055c35SXin Li ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ 109*b2055c35SXin Li (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC)))) 110*b2055c35SXin Li #define WEBP_USE_NEON 111*b2055c35SXin Li #define WEBP_USE_INTRINSICS 112*b2055c35SXin Li #endif 113*b2055c35SXin Li 114*b2055c35SXin Li #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) 115*b2055c35SXin Li #define WEBP_AARCH64 1 116*b2055c35SXin Li #else 117*b2055c35SXin Li #define WEBP_AARCH64 0 118*b2055c35SXin Li #endif 119*b2055c35SXin Li 120*b2055c35SXin Li #if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) 121*b2055c35SXin Li #define WEBP_HAVE_NEON 122*b2055c35SXin Li #endif 123*b2055c35SXin Li 124*b2055c35SXin Li //------------------------------------------------------------------------------ 125*b2055c35SXin Li // MIPS defines. 126*b2055c35SXin Li 127*b2055c35SXin Li #if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \ 128*b2055c35SXin Li (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) 129*b2055c35SXin Li #define WEBP_USE_MIPS32 130*b2055c35SXin Li #if (__mips_isa_rev >= 2) 131*b2055c35SXin Li #define WEBP_USE_MIPS32_R2 132*b2055c35SXin Li #if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) 133*b2055c35SXin Li #define WEBP_USE_MIPS_DSP_R2 134*b2055c35SXin Li #endif 135*b2055c35SXin Li #endif 136*b2055c35SXin Li #endif 137*b2055c35SXin Li 138*b2055c35SXin Li #if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) 139*b2055c35SXin Li #define WEBP_USE_MSA 140*b2055c35SXin Li #endif 141*b2055c35SXin Li 142*b2055c35SXin Li //------------------------------------------------------------------------------ 143*b2055c35SXin Li 144*b2055c35SXin Li #ifndef WEBP_DSP_OMIT_C_CODE 145*b2055c35SXin Li #define WEBP_DSP_OMIT_C_CODE 1 146*b2055c35SXin Li #endif 147*b2055c35SXin Li 148*b2055c35SXin Li #if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE 149*b2055c35SXin Li #define WEBP_NEON_OMIT_C_CODE 1 150*b2055c35SXin Li #else 151*b2055c35SXin Li #define WEBP_NEON_OMIT_C_CODE 0 152*b2055c35SXin Li #endif 153*b2055c35SXin Li 154*b2055c35SXin Li #if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) 155*b2055c35SXin Li #define WEBP_NEON_WORK_AROUND_GCC 1 156*b2055c35SXin Li #else 157*b2055c35SXin Li #define WEBP_NEON_WORK_AROUND_GCC 0 158*b2055c35SXin Li #endif 159*b2055c35SXin Li 160*b2055c35SXin Li //------------------------------------------------------------------------------ 161*b2055c35SXin Li 162*b2055c35SXin Li // This macro prevents thread_sanitizer from reporting known concurrent writes. 163*b2055c35SXin Li #define WEBP_TSAN_IGNORE_FUNCTION 164*b2055c35SXin Li #if defined(__has_feature) 165*b2055c35SXin Li #if __has_feature(thread_sanitizer) 166*b2055c35SXin Li #undef WEBP_TSAN_IGNORE_FUNCTION 167*b2055c35SXin Li #define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) 168*b2055c35SXin Li #endif 169*b2055c35SXin Li #endif 170*b2055c35SXin Li 171*b2055c35SXin Li #if defined(__has_feature) 172*b2055c35SXin Li #if __has_feature(memory_sanitizer) 173*b2055c35SXin Li #define WEBP_MSAN 174*b2055c35SXin Li #endif 175*b2055c35SXin Li #endif 176*b2055c35SXin Li 177*b2055c35SXin Li #if defined(WEBP_USE_THREAD) && !defined(_WIN32) 178*b2055c35SXin Li #include <pthread.h> // NOLINT 179*b2055c35SXin Li 180*b2055c35SXin Li #define WEBP_DSP_INIT(func) \ 181*b2055c35SXin Li do { \ 182*b2055c35SXin Li static volatile VP8CPUInfo func##_last_cpuinfo_used = \ 183*b2055c35SXin Li (VP8CPUInfo)&func##_last_cpuinfo_used; \ 184*b2055c35SXin Li static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \ 185*b2055c35SXin Li if (pthread_mutex_lock(&func##_lock)) break; \ 186*b2055c35SXin Li if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \ 187*b2055c35SXin Li func##_last_cpuinfo_used = VP8GetCPUInfo; \ 188*b2055c35SXin Li (void)pthread_mutex_unlock(&func##_lock); \ 189*b2055c35SXin Li } while (0) 190*b2055c35SXin Li #else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) 191*b2055c35SXin Li #define WEBP_DSP_INIT(func) \ 192*b2055c35SXin Li do { \ 193*b2055c35SXin Li static volatile VP8CPUInfo func##_last_cpuinfo_used = \ 194*b2055c35SXin Li (VP8CPUInfo)&func##_last_cpuinfo_used; \ 195*b2055c35SXin Li if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \ 196*b2055c35SXin Li func(); \ 197*b2055c35SXin Li func##_last_cpuinfo_used = VP8GetCPUInfo; \ 198*b2055c35SXin Li } while (0) 199*b2055c35SXin Li #endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) 200*b2055c35SXin Li 201*b2055c35SXin Li // Defines an Init + helper function that control multiple initialization of 202*b2055c35SXin Li // function pointers / tables. 203*b2055c35SXin Li /* Usage: 204*b2055c35SXin Li WEBP_DSP_INIT_FUNC(InitFunc) { 205*b2055c35SXin Li ...function body 206*b2055c35SXin Li } 207*b2055c35SXin Li */ 208*b2055c35SXin Li #define WEBP_DSP_INIT_FUNC(name) \ 209*b2055c35SXin Li static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \ 210*b2055c35SXin Li WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \ 211*b2055c35SXin Li static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void) 212*b2055c35SXin Li 213*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNDEF 214*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW 215*b2055c35SXin Li #if defined(__clang__) && defined(__has_attribute) 216*b2055c35SXin Li #if __has_attribute(no_sanitize) 217*b2055c35SXin Li // This macro prevents the undefined behavior sanitizer from reporting 218*b2055c35SXin Li // failures. This is only meant to silence unaligned loads on platforms that 219*b2055c35SXin Li // are known to support them. 220*b2055c35SXin Li #undef WEBP_UBSAN_IGNORE_UNDEF 221*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined"))) 222*b2055c35SXin Li 223*b2055c35SXin Li // This macro prevents the undefined behavior sanitizer from reporting 224*b2055c35SXin Li // failures related to unsigned integer overflows. This is only meant to 225*b2055c35SXin Li // silence cases where this well defined behavior is expected. 226*b2055c35SXin Li #undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW 227*b2055c35SXin Li #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ 228*b2055c35SXin Li __attribute__((no_sanitize("unsigned-integer-overflow"))) 229*b2055c35SXin Li #endif 230*b2055c35SXin Li #endif 231*b2055c35SXin Li 232*b2055c35SXin Li // If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. 233*b2055c35SXin Li // Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. 234*b2055c35SXin Li #if !defined(WEBP_OFFSET_PTR) 235*b2055c35SXin Li #define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) 236*b2055c35SXin Li #endif 237*b2055c35SXin Li 238*b2055c35SXin Li // Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) 239*b2055c35SXin Li #if !defined(WEBP_SWAP_16BIT_CSP) 240*b2055c35SXin Li #define WEBP_SWAP_16BIT_CSP 0 241*b2055c35SXin Li #endif 242*b2055c35SXin Li 243*b2055c35SXin Li // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) 244*b2055c35SXin Li #if !defined(WORDS_BIGENDIAN) && \ 245*b2055c35SXin Li (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ 246*b2055c35SXin Li (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) 247*b2055c35SXin Li #define WORDS_BIGENDIAN 248*b2055c35SXin Li #endif 249*b2055c35SXin Li 250*b2055c35SXin Li typedef enum { 251*b2055c35SXin Li kSSE2, 252*b2055c35SXin Li kSSE3, 253*b2055c35SXin Li kSlowSSSE3, // special feature for slow SSSE3 architectures 254*b2055c35SXin Li kSSE4_1, 255*b2055c35SXin Li kAVX, 256*b2055c35SXin Li kAVX2, 257*b2055c35SXin Li kNEON, 258*b2055c35SXin Li kMIPS32, 259*b2055c35SXin Li kMIPSdspR2, 260*b2055c35SXin Li kMSA 261*b2055c35SXin Li } CPUFeature; 262*b2055c35SXin Li 263*b2055c35SXin Li // returns true if the CPU supports the feature. 264*b2055c35SXin Li typedef int (*VP8CPUInfo)(CPUFeature feature); 265*b2055c35SXin Li 266*b2055c35SXin Li #endif // WEBP_DSP_CPU_H_ 267