xref: /aosp_15_r20/external/webp/src/dsp/cpu.c (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1*b2055c35SXin Li // Copyright 2011 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // CPU detection
11*b2055c35SXin Li //
12*b2055c35SXin Li // Author: Christian Duvivier ([email protected])
13*b2055c35SXin Li 
14*b2055c35SXin Li #include "src/dsp/cpu.h"
15*b2055c35SXin Li 
16*b2055c35SXin Li #if defined(WEBP_HAVE_NEON_RTCD)
17*b2055c35SXin Li #include <stdio.h>
18*b2055c35SXin Li #include <string.h>
19*b2055c35SXin Li #endif
20*b2055c35SXin Li 
21*b2055c35SXin Li #if defined(WEBP_ANDROID_NEON)
22*b2055c35SXin Li #include <cpu-features.h>
23*b2055c35SXin Li #endif
24*b2055c35SXin Li 
25*b2055c35SXin Li //------------------------------------------------------------------------------
26*b2055c35SXin Li // SSE2 detection.
27*b2055c35SXin Li //
28*b2055c35SXin Li 
29*b2055c35SXin Li // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
30*b2055c35SXin Li #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
GetCPUInfo(int cpu_info[4],int info_type)31*b2055c35SXin Li static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
32*b2055c35SXin Li   __asm__ volatile (
33*b2055c35SXin Li     "mov %%ebx, %%edi\n"
34*b2055c35SXin Li     "cpuid\n"
35*b2055c35SXin Li     "xchg %%edi, %%ebx\n"
36*b2055c35SXin Li     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
37*b2055c35SXin Li     : "a"(info_type), "c"(0));
38*b2055c35SXin Li }
39*b2055c35SXin Li #elif defined(__i386__) || defined(__x86_64__)
GetCPUInfo(int cpu_info[4],int info_type)40*b2055c35SXin Li static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
41*b2055c35SXin Li   __asm__ volatile (
42*b2055c35SXin Li     "cpuid\n"
43*b2055c35SXin Li     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
44*b2055c35SXin Li     : "a"(info_type), "c"(0));
45*b2055c35SXin Li }
46*b2055c35SXin Li #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
47*b2055c35SXin Li 
48*b2055c35SXin Li #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
49*b2055c35SXin Li #include <intrin.h>
50*b2055c35SXin Li #define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
51*b2055c35SXin Li #define WEBP_HAVE_MSC_CPUID
52*b2055c35SXin Li #elif _MSC_VER > 1310
53*b2055c35SXin Li #include <intrin.h>
54*b2055c35SXin Li #define GetCPUInfo __cpuid
55*b2055c35SXin Li #define WEBP_HAVE_MSC_CPUID
56*b2055c35SXin Li #endif
57*b2055c35SXin Li 
58*b2055c35SXin Li #endif
59*b2055c35SXin Li 
60*b2055c35SXin Li // NaCl has no support for xgetbv or the raw opcode.
61*b2055c35SXin Li #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
xgetbv(void)62*b2055c35SXin Li static WEBP_INLINE uint64_t xgetbv(void) {
63*b2055c35SXin Li   const uint32_t ecx = 0;
64*b2055c35SXin Li   uint32_t eax, edx;
65*b2055c35SXin Li   // Use the raw opcode for xgetbv for compatibility with older toolchains.
66*b2055c35SXin Li   __asm__ volatile (
67*b2055c35SXin Li     ".byte 0x0f, 0x01, 0xd0\n"
68*b2055c35SXin Li     : "=a"(eax), "=d"(edx) : "c" (ecx));
69*b2055c35SXin Li   return ((uint64_t)edx << 32) | eax;
70*b2055c35SXin Li }
71*b2055c35SXin Li #elif (defined(_M_X64) || defined(_M_IX86)) && \
72*b2055c35SXin Li       defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
73*b2055c35SXin Li #include <immintrin.h>
74*b2055c35SXin Li #define xgetbv() _xgetbv(0)
75*b2055c35SXin Li #elif defined(_MSC_VER) && defined(_M_IX86)
xgetbv(void)76*b2055c35SXin Li static WEBP_INLINE uint64_t xgetbv(void) {
77*b2055c35SXin Li   uint32_t eax_, edx_;
78*b2055c35SXin Li   __asm {
79*b2055c35SXin Li     xor ecx, ecx  // ecx = 0
80*b2055c35SXin Li     // Use the raw opcode for xgetbv for compatibility with older toolchains.
81*b2055c35SXin Li     __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
82*b2055c35SXin Li     mov eax_, eax
83*b2055c35SXin Li     mov edx_, edx
84*b2055c35SXin Li   }
85*b2055c35SXin Li   return ((uint64_t)edx_ << 32) | eax_;
86*b2055c35SXin Li }
87*b2055c35SXin Li #else
88*b2055c35SXin Li #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
89*b2055c35SXin Li #endif
90*b2055c35SXin Li 
91*b2055c35SXin Li #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)
92*b2055c35SXin Li 
93*b2055c35SXin Li // helper function for run-time detection of slow SSSE3 platforms
CheckSlowModel(int info)94*b2055c35SXin Li static int CheckSlowModel(int info) {
95*b2055c35SXin Li   // Table listing display models with longer latencies for the bsr instruction
96*b2055c35SXin Li   // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
97*b2055c35SXin Li   // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
98*b2055c35SXin Li   static const uint8_t kSlowModels[] = {
99*b2055c35SXin Li     0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture
100*b2055c35SXin Li     0x1c, 0x26, 0x27   // Atom Microarchitecture
101*b2055c35SXin Li   };
102*b2055c35SXin Li   const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
103*b2055c35SXin Li   const uint32_t family = (info >> 8) & 0xf;
104*b2055c35SXin Li   if (family == 0x06) {
105*b2055c35SXin Li     size_t i;
106*b2055c35SXin Li     for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
107*b2055c35SXin Li       if (model == kSlowModels[i]) return 1;
108*b2055c35SXin Li     }
109*b2055c35SXin Li   }
110*b2055c35SXin Li   return 0;
111*b2055c35SXin Li }
112*b2055c35SXin Li 
x86CPUInfo(CPUFeature feature)113*b2055c35SXin Li static int x86CPUInfo(CPUFeature feature) {
114*b2055c35SXin Li   int max_cpuid_value;
115*b2055c35SXin Li   int cpu_info[4];
116*b2055c35SXin Li   int is_intel = 0;
117*b2055c35SXin Li 
118*b2055c35SXin Li   // get the highest feature value cpuid supports
119*b2055c35SXin Li   GetCPUInfo(cpu_info, 0);
120*b2055c35SXin Li   max_cpuid_value = cpu_info[0];
121*b2055c35SXin Li   if (max_cpuid_value < 1) {
122*b2055c35SXin Li     return 0;
123*b2055c35SXin Li   } else {
124*b2055c35SXin Li     const int VENDOR_ID_INTEL_EBX = 0x756e6547;  // uneG
125*b2055c35SXin Li     const int VENDOR_ID_INTEL_EDX = 0x49656e69;  // Ieni
126*b2055c35SXin Li     const int VENDOR_ID_INTEL_ECX = 0x6c65746e;  // letn
127*b2055c35SXin Li     is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
128*b2055c35SXin Li                 cpu_info[2] == VENDOR_ID_INTEL_ECX &&
129*b2055c35SXin Li                 cpu_info[3] == VENDOR_ID_INTEL_EDX);    // genuine Intel?
130*b2055c35SXin Li   }
131*b2055c35SXin Li 
132*b2055c35SXin Li   GetCPUInfo(cpu_info, 1);
133*b2055c35SXin Li   if (feature == kSSE2) {
134*b2055c35SXin Li     return !!(cpu_info[3] & (1 << 26));
135*b2055c35SXin Li   }
136*b2055c35SXin Li   if (feature == kSSE3) {
137*b2055c35SXin Li     return !!(cpu_info[2] & (1 << 0));
138*b2055c35SXin Li   }
139*b2055c35SXin Li   if (feature == kSlowSSSE3) {
140*b2055c35SXin Li     if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
141*b2055c35SXin Li       return CheckSlowModel(cpu_info[0]);
142*b2055c35SXin Li     }
143*b2055c35SXin Li     return 0;
144*b2055c35SXin Li   }
145*b2055c35SXin Li 
146*b2055c35SXin Li   if (feature == kSSE4_1) {
147*b2055c35SXin Li     return !!(cpu_info[2] & (1 << 19));
148*b2055c35SXin Li   }
149*b2055c35SXin Li   if (feature == kAVX) {
150*b2055c35SXin Li     // bits 27 (OSXSAVE) & 28 (256-bit AVX)
151*b2055c35SXin Li     if ((cpu_info[2] & 0x18000000) == 0x18000000) {
152*b2055c35SXin Li       // XMM state and YMM state enabled by the OS.
153*b2055c35SXin Li       return (xgetbv() & 0x6) == 0x6;
154*b2055c35SXin Li     }
155*b2055c35SXin Li   }
156*b2055c35SXin Li   if (feature == kAVX2) {
157*b2055c35SXin Li     if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
158*b2055c35SXin Li       GetCPUInfo(cpu_info, 7);
159*b2055c35SXin Li       return !!(cpu_info[1] & (1 << 5));
160*b2055c35SXin Li     }
161*b2055c35SXin Li   }
162*b2055c35SXin Li   return 0;
163*b2055c35SXin Li }
164*b2055c35SXin Li WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
165*b2055c35SXin Li VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
166*b2055c35SXin Li #elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
AndroidCPUInfo(CPUFeature feature)167*b2055c35SXin Li static int AndroidCPUInfo(CPUFeature feature) {
168*b2055c35SXin Li   const AndroidCpuFamily cpu_family = android_getCpuFamily();
169*b2055c35SXin Li   const uint64_t cpu_features = android_getCpuFeatures();
170*b2055c35SXin Li   if (feature == kNEON) {
171*b2055c35SXin Li     return cpu_family == ANDROID_CPU_FAMILY_ARM &&
172*b2055c35SXin Li            (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
173*b2055c35SXin Li   }
174*b2055c35SXin Li   return 0;
175*b2055c35SXin Li }
176*b2055c35SXin Li WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
177*b2055c35SXin Li VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
178*b2055c35SXin Li #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
179*b2055c35SXin Li // Use compile flags as an indicator of SIMD support instead of a runtime check.
wasmCPUInfo(CPUFeature feature)180*b2055c35SXin Li static int wasmCPUInfo(CPUFeature feature) {
181*b2055c35SXin Li   switch (feature) {
182*b2055c35SXin Li #ifdef WEBP_HAVE_SSE2
183*b2055c35SXin Li     case kSSE2:
184*b2055c35SXin Li       return 1;
185*b2055c35SXin Li #endif
186*b2055c35SXin Li #ifdef WEBP_HAVE_SSE41
187*b2055c35SXin Li     case kSSE3:
188*b2055c35SXin Li     case kSlowSSSE3:
189*b2055c35SXin Li     case kSSE4_1:
190*b2055c35SXin Li       return 1;
191*b2055c35SXin Li #endif
192*b2055c35SXin Li #ifdef WEBP_HAVE_NEON
193*b2055c35SXin Li     case kNEON:
194*b2055c35SXin Li       return 1;
195*b2055c35SXin Li #endif
196*b2055c35SXin Li     default:
197*b2055c35SXin Li       break;
198*b2055c35SXin Li   }
199*b2055c35SXin Li   return 0;
200*b2055c35SXin Li }
201*b2055c35SXin Li WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
202*b2055c35SXin Li VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
203*b2055c35SXin Li #elif defined(WEBP_HAVE_NEON)
204*b2055c35SXin Li // In most cases this function doesn't check for NEON support (it's assumed by
205*b2055c35SXin Li // the configuration), but enables turning off NEON at runtime, for testing
206*b2055c35SXin Li // purposes, by setting VP8GetCPUInfo = NULL.
armCPUInfo(CPUFeature feature)207*b2055c35SXin Li static int armCPUInfo(CPUFeature feature) {
208*b2055c35SXin Li   if (feature != kNEON) return 0;
209*b2055c35SXin Li #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
210*b2055c35SXin Li   {
211*b2055c35SXin Li     int has_neon = 0;
212*b2055c35SXin Li     char line[200];
213*b2055c35SXin Li     FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
214*b2055c35SXin Li     if (cpuinfo == NULL) return 0;
215*b2055c35SXin Li     while (fgets(line, sizeof(line), cpuinfo)) {
216*b2055c35SXin Li       if (!strncmp(line, "Features", 8)) {
217*b2055c35SXin Li         if (strstr(line, " neon ") != NULL) {
218*b2055c35SXin Li           has_neon = 1;
219*b2055c35SXin Li           break;
220*b2055c35SXin Li         }
221*b2055c35SXin Li       }
222*b2055c35SXin Li     }
223*b2055c35SXin Li     fclose(cpuinfo);
224*b2055c35SXin Li     return has_neon;
225*b2055c35SXin Li   }
226*b2055c35SXin Li #else
227*b2055c35SXin Li   return 1;
228*b2055c35SXin Li #endif
229*b2055c35SXin Li }
230*b2055c35SXin Li WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
231*b2055c35SXin Li VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
232*b2055c35SXin Li #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
233*b2055c35SXin Li       defined(WEBP_USE_MSA)
mipsCPUInfo(CPUFeature feature)234*b2055c35SXin Li static int mipsCPUInfo(CPUFeature feature) {
235*b2055c35SXin Li   if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
236*b2055c35SXin Li     return 1;
237*b2055c35SXin Li   } else {
238*b2055c35SXin Li     return 0;
239*b2055c35SXin Li   }
240*b2055c35SXin Li 
241*b2055c35SXin Li }
242*b2055c35SXin Li WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
243*b2055c35SXin Li VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
244*b2055c35SXin Li #else
245*b2055c35SXin Li WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
246*b2055c35SXin Li VP8CPUInfo VP8GetCPUInfo = NULL;
247*b2055c35SXin Li #endif
248