1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // CPU detection
11 //
12 // Author: Christian Duvivier ([email protected])
13
14 #include "src/dsp/cpu.h"
15
16 #if defined(WEBP_HAVE_NEON_RTCD)
17 #include <stdio.h>
18 #include <string.h>
19 #endif
20
21 #if defined(WEBP_ANDROID_NEON)
22 #include <cpu-features.h>
23 #endif
24
25 //------------------------------------------------------------------------------
26 // SSE2 detection.
27 //
28
29 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
30 #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
GetCPUInfo(int cpu_info[4],int info_type)31 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
32 __asm__ volatile (
33 "mov %%ebx, %%edi\n"
34 "cpuid\n"
35 "xchg %%edi, %%ebx\n"
36 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
37 : "a"(info_type), "c"(0));
38 }
39 #elif defined(__i386__) || defined(__x86_64__)
GetCPUInfo(int cpu_info[4],int info_type)40 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
41 __asm__ volatile (
42 "cpuid\n"
43 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
44 : "a"(info_type), "c"(0));
45 }
46 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
47
48 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
49 #include <intrin.h>
50 #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
51 #define WEBP_HAVE_MSC_CPUID
52 #elif _MSC_VER > 1310
53 #include <intrin.h>
54 #define GetCPUInfo __cpuid
55 #define WEBP_HAVE_MSC_CPUID
56 #endif
57
58 #endif
59
60 // NaCl has no support for xgetbv or the raw opcode.
61 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
xgetbv(void)62 static WEBP_INLINE uint64_t xgetbv(void) {
63 const uint32_t ecx = 0;
64 uint32_t eax, edx;
65 // Use the raw opcode for xgetbv for compatibility with older toolchains.
66 __asm__ volatile (
67 ".byte 0x0f, 0x01, 0xd0\n"
68 : "=a"(eax), "=d"(edx) : "c" (ecx));
69 return ((uint64_t)edx << 32) | eax;
70 }
71 #elif (defined(_M_X64) || defined(_M_IX86)) && \
72 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
73 #include <immintrin.h>
74 #define xgetbv() _xgetbv(0)
75 #elif defined(_MSC_VER) && defined(_M_IX86)
xgetbv(void)76 static WEBP_INLINE uint64_t xgetbv(void) {
77 uint32_t eax_, edx_;
78 __asm {
79 xor ecx, ecx // ecx = 0
80 // Use the raw opcode for xgetbv for compatibility with older toolchains.
81 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
82 mov eax_, eax
83 mov edx_, edx
84 }
85 return ((uint64_t)edx_ << 32) | eax_;
86 }
87 #else
88 #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
89 #endif
90
91 #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)
92
93 // helper function for run-time detection of slow SSSE3 platforms
CheckSlowModel(int info)94 static int CheckSlowModel(int info) {
95 // Table listing display models with longer latencies for the bsr instruction
96 // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
97 // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
98 static const uint8_t kSlowModels[] = {
99 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
100 0x1c, 0x26, 0x27 // Atom Microarchitecture
101 };
102 const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
103 const uint32_t family = (info >> 8) & 0xf;
104 if (family == 0x06) {
105 size_t i;
106 for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
107 if (model == kSlowModels[i]) return 1;
108 }
109 }
110 return 0;
111 }
112
x86CPUInfo(CPUFeature feature)113 static int x86CPUInfo(CPUFeature feature) {
114 int max_cpuid_value;
115 int cpu_info[4];
116 int is_intel = 0;
117
118 // get the highest feature value cpuid supports
119 GetCPUInfo(cpu_info, 0);
120 max_cpuid_value = cpu_info[0];
121 if (max_cpuid_value < 1) {
122 return 0;
123 } else {
124 const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
125 const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
126 const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
127 is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
128 cpu_info[2] == VENDOR_ID_INTEL_ECX &&
129 cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
130 }
131
132 GetCPUInfo(cpu_info, 1);
133 if (feature == kSSE2) {
134 return !!(cpu_info[3] & (1 << 26));
135 }
136 if (feature == kSSE3) {
137 return !!(cpu_info[2] & (1 << 0));
138 }
139 if (feature == kSlowSSSE3) {
140 if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
141 return CheckSlowModel(cpu_info[0]);
142 }
143 return 0;
144 }
145
146 if (feature == kSSE4_1) {
147 return !!(cpu_info[2] & (1 << 19));
148 }
149 if (feature == kAVX) {
150 // bits 27 (OSXSAVE) & 28 (256-bit AVX)
151 if ((cpu_info[2] & 0x18000000) == 0x18000000) {
152 // XMM state and YMM state enabled by the OS.
153 return (xgetbv() & 0x6) == 0x6;
154 }
155 }
156 if (feature == kAVX2) {
157 if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
158 GetCPUInfo(cpu_info, 7);
159 return !!(cpu_info[1] & (1 << 5));
160 }
161 }
162 return 0;
163 }
164 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
165 VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
166 #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
AndroidCPUInfo(CPUFeature feature)167 static int AndroidCPUInfo(CPUFeature feature) {
168 const AndroidCpuFamily cpu_family = android_getCpuFamily();
169 const uint64_t cpu_features = android_getCpuFeatures();
170 if (feature == kNEON) {
171 return cpu_family == ANDROID_CPU_FAMILY_ARM &&
172 (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
173 }
174 return 0;
175 }
176 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
177 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
178 #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
179 // Use compile flags as an indicator of SIMD support instead of a runtime check.
wasmCPUInfo(CPUFeature feature)180 static int wasmCPUInfo(CPUFeature feature) {
181 switch (feature) {
182 #ifdef WEBP_HAVE_SSE2
183 case kSSE2:
184 return 1;
185 #endif
186 #ifdef WEBP_HAVE_SSE41
187 case kSSE3:
188 case kSlowSSSE3:
189 case kSSE4_1:
190 return 1;
191 #endif
192 #ifdef WEBP_HAVE_NEON
193 case kNEON:
194 return 1;
195 #endif
196 default:
197 break;
198 }
199 return 0;
200 }
201 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
202 VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
203 #elif defined(WEBP_HAVE_NEON)
204 // In most cases this function doesn't check for NEON support (it's assumed by
205 // the configuration), but enables turning off NEON at runtime, for testing
206 // purposes, by setting VP8GetCPUInfo = NULL.
armCPUInfo(CPUFeature feature)207 static int armCPUInfo(CPUFeature feature) {
208 if (feature != kNEON) return 0;
209 #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
210 {
211 int has_neon = 0;
212 char line[200];
213 FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
214 if (cpuinfo == NULL) return 0;
215 while (fgets(line, sizeof(line), cpuinfo)) {
216 if (!strncmp(line, "Features", 8)) {
217 if (strstr(line, " neon ") != NULL) {
218 has_neon = 1;
219 break;
220 }
221 }
222 }
223 fclose(cpuinfo);
224 return has_neon;
225 }
226 #else
227 return 1;
228 #endif
229 }
230 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
231 VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
232 #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
233 defined(WEBP_USE_MSA)
mipsCPUInfo(CPUFeature feature)234 static int mipsCPUInfo(CPUFeature feature) {
235 if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
236 return 1;
237 } else {
238 return 0;
239 }
240
241 }
242 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
243 VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
244 #else
245 WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
246 VP8CPUInfo VP8GetCPUInfo = NULL;
247 #endif
248