xref: /aosp_15_r20/external/mesa3d/src/util/u_cpu_detect.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /**************************************************************************
2*61046927SAndroid Build Coastguard Worker  *
3*61046927SAndroid Build Coastguard Worker  * Copyright 2008 Dennis Smit
4*61046927SAndroid Build Coastguard Worker  * All Rights Reserved.
5*61046927SAndroid Build Coastguard Worker  *
6*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
7*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
8*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
9*61046927SAndroid Build Coastguard Worker  * on the rights to use, copy, modify, merge, publish, distribute, sub
10*61046927SAndroid Build Coastguard Worker  * license, and/or sell copies of the Software, and to permit persons to whom
11*61046927SAndroid Build Coastguard Worker  * the Software is furnished to do so, subject to the following conditions:
12*61046927SAndroid Build Coastguard Worker  *
13*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
14*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
15*61046927SAndroid Build Coastguard Worker  * Software.
16*61046927SAndroid Build Coastguard Worker  *
17*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
20*61046927SAndroid Build Coastguard Worker  * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21*61046927SAndroid Build Coastguard Worker  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22*61046927SAndroid Build Coastguard Worker  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23*61046927SAndroid Build Coastguard Worker  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24*61046927SAndroid Build Coastguard Worker  *
25*61046927SAndroid Build Coastguard Worker  **************************************************************************/
26*61046927SAndroid Build Coastguard Worker 
27*61046927SAndroid Build Coastguard Worker /**
28*61046927SAndroid Build Coastguard Worker  * @file
29*61046927SAndroid Build Coastguard Worker  * CPU feature detection.
30*61046927SAndroid Build Coastguard Worker  *
31*61046927SAndroid Build Coastguard Worker  * @author Dennis Smit
32*61046927SAndroid Build Coastguard Worker  * @author Based on the work of Eric Anholt <[email protected]>
33*61046927SAndroid Build Coastguard Worker  */
34*61046927SAndroid Build Coastguard Worker 
35*61046927SAndroid Build Coastguard Worker #include "util/detect.h"
36*61046927SAndroid Build Coastguard Worker #include "util/compiler.h"
37*61046927SAndroid Build Coastguard Worker 
38*61046927SAndroid Build Coastguard Worker #include "util/u_debug.h"
39*61046927SAndroid Build Coastguard Worker #include "u_cpu_detect.h"
40*61046927SAndroid Build Coastguard Worker #include "u_math.h"
41*61046927SAndroid Build Coastguard Worker #include "os_file.h"
42*61046927SAndroid Build Coastguard Worker #include "c11/threads.h"
43*61046927SAndroid Build Coastguard Worker 
44*61046927SAndroid Build Coastguard Worker #include <stdio.h>
45*61046927SAndroid Build Coastguard Worker #include <inttypes.h>
46*61046927SAndroid Build Coastguard Worker 
47*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_PPC
48*61046927SAndroid Build Coastguard Worker #if DETECT_OS_APPLE
49*61046927SAndroid Build Coastguard Worker #include <sys/sysctl.h>
50*61046927SAndroid Build Coastguard Worker #else
51*61046927SAndroid Build Coastguard Worker #include <signal.h>
52*61046927SAndroid Build Coastguard Worker #include <setjmp.h>
53*61046927SAndroid Build Coastguard Worker #endif
54*61046927SAndroid Build Coastguard Worker #endif
55*61046927SAndroid Build Coastguard Worker 
56*61046927SAndroid Build Coastguard Worker #if DETECT_OS_BSD
57*61046927SAndroid Build Coastguard Worker #include <sys/param.h>
58*61046927SAndroid Build Coastguard Worker #include <sys/sysctl.h>
59*61046927SAndroid Build Coastguard Worker #include <machine/cpu.h>
60*61046927SAndroid Build Coastguard Worker #endif
61*61046927SAndroid Build Coastguard Worker 
62*61046927SAndroid Build Coastguard Worker #if DETECT_OS_FREEBSD
63*61046927SAndroid Build Coastguard Worker #if __has_include(<sys/auxv.h>)
64*61046927SAndroid Build Coastguard Worker #include <sys/auxv.h>
65*61046927SAndroid Build Coastguard Worker #define HAVE_ELF_AUX_INFO
66*61046927SAndroid Build Coastguard Worker #endif
67*61046927SAndroid Build Coastguard Worker #endif
68*61046927SAndroid Build Coastguard Worker 
69*61046927SAndroid Build Coastguard Worker #if DETECT_OS_LINUX
70*61046927SAndroid Build Coastguard Worker #include <signal.h>
71*61046927SAndroid Build Coastguard Worker #include <fcntl.h>
72*61046927SAndroid Build Coastguard Worker #include <elf.h>
73*61046927SAndroid Build Coastguard Worker #endif
74*61046927SAndroid Build Coastguard Worker 
75*61046927SAndroid Build Coastguard Worker #if DETECT_OS_POSIX
76*61046927SAndroid Build Coastguard Worker #include <unistd.h>
77*61046927SAndroid Build Coastguard Worker #endif
78*61046927SAndroid Build Coastguard Worker 
79*61046927SAndroid Build Coastguard Worker #if defined(HAS_ANDROID_CPUFEATURES)
80*61046927SAndroid Build Coastguard Worker #include <cpu-features.h>
81*61046927SAndroid Build Coastguard Worker #endif
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker #if DETECT_OS_WINDOWS
84*61046927SAndroid Build Coastguard Worker #include <windows.h>
85*61046927SAndroid Build Coastguard Worker #if DETECT_CC_MSVC
86*61046927SAndroid Build Coastguard Worker #include <intrin.h>
87*61046927SAndroid Build Coastguard Worker #endif
88*61046927SAndroid Build Coastguard Worker #endif
89*61046927SAndroid Build Coastguard Worker 
90*61046927SAndroid Build Coastguard Worker #if defined(HAS_SCHED_H)
91*61046927SAndroid Build Coastguard Worker #include <sched.h>
92*61046927SAndroid Build Coastguard Worker #endif
93*61046927SAndroid Build Coastguard Worker 
94*61046927SAndroid Build Coastguard Worker // prevent inadvert infinite recursion
95*61046927SAndroid Build Coastguard Worker #define util_get_cpu_caps() util_get_cpu_caps_DO_NOT_USE()
96*61046927SAndroid Build Coastguard Worker 
97*61046927SAndroid Build Coastguard Worker DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false)
98*61046927SAndroid Build Coastguard Worker 
99*61046927SAndroid Build Coastguard Worker static
100*61046927SAndroid Build Coastguard Worker struct util_cpu_caps_t util_cpu_caps;
101*61046927SAndroid Build Coastguard Worker 
102*61046927SAndroid Build Coastguard Worker /* Do not try to access _util_cpu_caps_state directly, call to util_get_cpu_caps instead */
103*61046927SAndroid Build Coastguard Worker struct _util_cpu_caps_state_t _util_cpu_caps_state = {
104*61046927SAndroid Build Coastguard Worker    .once_flag = ONCE_FLAG_INIT,
105*61046927SAndroid Build Coastguard Worker    .detect_done = 0,
106*61046927SAndroid Build Coastguard Worker };
107*61046927SAndroid Build Coastguard Worker 
108*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
109*61046927SAndroid Build Coastguard Worker static int has_cpuid(void);
110*61046927SAndroid Build Coastguard Worker #endif
111*61046927SAndroid Build Coastguard Worker 
112*61046927SAndroid Build Coastguard Worker 
113*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_PPC && !DETECT_OS_APPLE && !DETECT_OS_BSD && !DETECT_OS_LINUX
114*61046927SAndroid Build Coastguard Worker static jmp_buf  __lv_powerpc_jmpbuf;
115*61046927SAndroid Build Coastguard Worker static volatile sig_atomic_t __lv_powerpc_canjump = 0;
116*61046927SAndroid Build Coastguard Worker 
117*61046927SAndroid Build Coastguard Worker static void
sigill_handler(int sig)118*61046927SAndroid Build Coastguard Worker sigill_handler(int sig)
119*61046927SAndroid Build Coastguard Worker {
120*61046927SAndroid Build Coastguard Worker    if (!__lv_powerpc_canjump) {
121*61046927SAndroid Build Coastguard Worker       signal (sig, SIG_DFL);
122*61046927SAndroid Build Coastguard Worker       raise (sig);
123*61046927SAndroid Build Coastguard Worker    }
124*61046927SAndroid Build Coastguard Worker 
125*61046927SAndroid Build Coastguard Worker    __lv_powerpc_canjump = 0;
126*61046927SAndroid Build Coastguard Worker    longjmp(__lv_powerpc_jmpbuf, 1);
127*61046927SAndroid Build Coastguard Worker }
128*61046927SAndroid Build Coastguard Worker #endif
129*61046927SAndroid Build Coastguard Worker 
130*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_PPC
131*61046927SAndroid Build Coastguard Worker static void
check_os_altivec_support(void)132*61046927SAndroid Build Coastguard Worker check_os_altivec_support(void)
133*61046927SAndroid Build Coastguard Worker {
134*61046927SAndroid Build Coastguard Worker #if defined(__ALTIVEC__)
135*61046927SAndroid Build Coastguard Worker    util_cpu_caps.has_altivec = 1;
136*61046927SAndroid Build Coastguard Worker #endif
137*61046927SAndroid Build Coastguard Worker #if defined(__VSX__)
138*61046927SAndroid Build Coastguard Worker    util_cpu_caps.has_vsx = 1;
139*61046927SAndroid Build Coastguard Worker #endif
140*61046927SAndroid Build Coastguard Worker #if defined(__ALTIVEC__) && defined(__VSX__)
141*61046927SAndroid Build Coastguard Worker /* Do nothing */
142*61046927SAndroid Build Coastguard Worker #elif DETECT_OS_APPLE || DETECT_OS_NETBSD || DETECT_OS_OPENBSD
143*61046927SAndroid Build Coastguard Worker #ifdef HW_VECTORUNIT
144*61046927SAndroid Build Coastguard Worker    int sels[2] = {CTL_HW, HW_VECTORUNIT};
145*61046927SAndroid Build Coastguard Worker #else
146*61046927SAndroid Build Coastguard Worker    int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC};
147*61046927SAndroid Build Coastguard Worker #endif
148*61046927SAndroid Build Coastguard Worker    int has_vu = 0;
149*61046927SAndroid Build Coastguard Worker    size_t len = sizeof (has_vu);
150*61046927SAndroid Build Coastguard Worker    int err;
151*61046927SAndroid Build Coastguard Worker 
152*61046927SAndroid Build Coastguard Worker    err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
153*61046927SAndroid Build Coastguard Worker 
154*61046927SAndroid Build Coastguard Worker    if (err == 0) {
155*61046927SAndroid Build Coastguard Worker       if (has_vu != 0) {
156*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_altivec = 1;
157*61046927SAndroid Build Coastguard Worker       }
158*61046927SAndroid Build Coastguard Worker    }
159*61046927SAndroid Build Coastguard Worker #elif DETECT_OS_FREEBSD /* !DETECT_OS_APPLE && !DETECT_OS_NETBSD && !DETECT_OS_OPENBSD */
160*61046927SAndroid Build Coastguard Worker    unsigned long hwcap = 0;
161*61046927SAndroid Build Coastguard Worker #ifdef HAVE_ELF_AUX_INFO
162*61046927SAndroid Build Coastguard Worker    elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
163*61046927SAndroid Build Coastguard Worker #else
164*61046927SAndroid Build Coastguard Worker    size_t len = sizeof(hwcap);
165*61046927SAndroid Build Coastguard Worker    sysctlbyname("hw.cpu_features", &hwcap, &len, NULL, 0);
166*61046927SAndroid Build Coastguard Worker #endif
167*61046927SAndroid Build Coastguard Worker    if (hwcap & PPC_FEATURE_HAS_ALTIVEC)
168*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_altivec = 1;
169*61046927SAndroid Build Coastguard Worker    if (hwcap & PPC_FEATURE_HAS_VSX)
170*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_vsx = 1;
171*61046927SAndroid Build Coastguard Worker #elif DETECT_OS_LINUX /* !DETECT_OS_FREEBSD */
172*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_PPC_64
173*61046927SAndroid Build Coastguard Worker     Elf64_auxv_t aux;
174*61046927SAndroid Build Coastguard Worker #else
175*61046927SAndroid Build Coastguard Worker     Elf32_auxv_t aux;
176*61046927SAndroid Build Coastguard Worker #endif
177*61046927SAndroid Build Coastguard Worker     int fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
178*61046927SAndroid Build Coastguard Worker     if (fd >= 0) {
179*61046927SAndroid Build Coastguard Worker        while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
180*61046927SAndroid Build Coastguard Worker           if (aux.a_type == AT_HWCAP) {
181*61046927SAndroid Build Coastguard Worker              char *env_vsx = getenv("GALLIVM_VSX");
182*61046927SAndroid Build Coastguard Worker              uint64_t hwcap = aux.a_un.a_val;
183*61046927SAndroid Build Coastguard Worker              util_cpu_caps.has_altivec = (hwcap >> 28) & 1;
184*61046927SAndroid Build Coastguard Worker              if (!env_vsx || env_vsx[0] != '0') {
185*61046927SAndroid Build Coastguard Worker                 util_cpu_caps.has_vsx  = (hwcap >>  7) & 1;
186*61046927SAndroid Build Coastguard Worker              }
187*61046927SAndroid Build Coastguard Worker              break;
188*61046927SAndroid Build Coastguard Worker           }
189*61046927SAndroid Build Coastguard Worker        }
190*61046927SAndroid Build Coastguard Worker        close(fd);
191*61046927SAndroid Build Coastguard Worker     }
192*61046927SAndroid Build Coastguard Worker #else /* !DETECT_OS_APPLE && !DETECT_OS_BSD && !DETECT_OS_LINUX */
193*61046927SAndroid Build Coastguard Worker    /* not on Apple/Darwin or Linux, do it the brute-force way */
194*61046927SAndroid Build Coastguard Worker    /* this is borrowed from the libmpeg2 library */
195*61046927SAndroid Build Coastguard Worker    signal(SIGILL, sigill_handler);
196*61046927SAndroid Build Coastguard Worker    if (setjmp(__lv_powerpc_jmpbuf)) {
197*61046927SAndroid Build Coastguard Worker       signal(SIGILL, SIG_DFL);
198*61046927SAndroid Build Coastguard Worker    } else {
199*61046927SAndroid Build Coastguard Worker       bool enable_altivec = true;    /* Default: enable  if available, and if not overridden */
200*61046927SAndroid Build Coastguard Worker       bool enable_vsx = true;
201*61046927SAndroid Build Coastguard Worker #if MESA_DEBUG
202*61046927SAndroid Build Coastguard Worker       /* Disabling Altivec code generation is not the same as disabling VSX code generation,
203*61046927SAndroid Build Coastguard Worker        * which can be done simply by passing -mattr=-vsx to the LLVM compiler; cf.
204*61046927SAndroid Build Coastguard Worker        * lp_build_create_jit_compiler_for_module().
205*61046927SAndroid Build Coastguard Worker        * If you want to disable Altivec code generation, the best place to do it is here.
206*61046927SAndroid Build Coastguard Worker        */
207*61046927SAndroid Build Coastguard Worker       char *env_control = getenv("GALLIVM_ALTIVEC");    /* 1=enable (default); 0=disable */
208*61046927SAndroid Build Coastguard Worker       if (env_control && env_control[0] == '0') {
209*61046927SAndroid Build Coastguard Worker          enable_altivec = false;
210*61046927SAndroid Build Coastguard Worker       }
211*61046927SAndroid Build Coastguard Worker #endif
212*61046927SAndroid Build Coastguard Worker       /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 or 0 */
213*61046927SAndroid Build Coastguard Worker       char *env_vsx = getenv("GALLIVM_VSX");
214*61046927SAndroid Build Coastguard Worker       if (env_vsx && env_vsx[0] == '0') {
215*61046927SAndroid Build Coastguard Worker          enable_vsx = false;
216*61046927SAndroid Build Coastguard Worker       }
217*61046927SAndroid Build Coastguard Worker       if (enable_altivec) {
218*61046927SAndroid Build Coastguard Worker          __lv_powerpc_canjump = 1;
219*61046927SAndroid Build Coastguard Worker 
220*61046927SAndroid Build Coastguard Worker          __asm __volatile
221*61046927SAndroid Build Coastguard Worker             ("mtspr 256, %0\n\t"
222*61046927SAndroid Build Coastguard Worker              "vand %%v0, %%v0, %%v0"
223*61046927SAndroid Build Coastguard Worker              :
224*61046927SAndroid Build Coastguard Worker              : "r" (-1));
225*61046927SAndroid Build Coastguard Worker 
226*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_altivec = 1;
227*61046927SAndroid Build Coastguard Worker 
228*61046927SAndroid Build Coastguard Worker          if (enable_vsx) {
229*61046927SAndroid Build Coastguard Worker             __asm __volatile("xxland %vs0, %vs0, %vs0");
230*61046927SAndroid Build Coastguard Worker             util_cpu_caps.has_vsx = 1;
231*61046927SAndroid Build Coastguard Worker          }
232*61046927SAndroid Build Coastguard Worker          signal(SIGILL, SIG_DFL);
233*61046927SAndroid Build Coastguard Worker       } else {
234*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_altivec = 0;
235*61046927SAndroid Build Coastguard Worker       }
236*61046927SAndroid Build Coastguard Worker    }
237*61046927SAndroid Build Coastguard Worker #endif /* !DETECT_OS_APPLE && !DETECT_OS_LINUX */
238*61046927SAndroid Build Coastguard Worker }
239*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_PPC */
240*61046927SAndroid Build Coastguard Worker 
241*61046927SAndroid Build Coastguard Worker 
242*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
has_cpuid(void)243*61046927SAndroid Build Coastguard Worker static int has_cpuid(void)
244*61046927SAndroid Build Coastguard Worker {
245*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86
246*61046927SAndroid Build Coastguard Worker #if DETECT_OS_GCC
247*61046927SAndroid Build Coastguard Worker    int a, c;
248*61046927SAndroid Build Coastguard Worker 
249*61046927SAndroid Build Coastguard Worker    __asm __volatile
250*61046927SAndroid Build Coastguard Worker       ("pushf\n"
251*61046927SAndroid Build Coastguard Worker        "popl %0\n"
252*61046927SAndroid Build Coastguard Worker        "movl %0, %1\n"
253*61046927SAndroid Build Coastguard Worker        "xorl $0x200000, %0\n"
254*61046927SAndroid Build Coastguard Worker        "push %0\n"
255*61046927SAndroid Build Coastguard Worker        "popf\n"
256*61046927SAndroid Build Coastguard Worker        "pushf\n"
257*61046927SAndroid Build Coastguard Worker        "popl %0\n"
258*61046927SAndroid Build Coastguard Worker        : "=a" (a), "=c" (c)
259*61046927SAndroid Build Coastguard Worker        :
260*61046927SAndroid Build Coastguard Worker        : "cc");
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker    return a != c;
263*61046927SAndroid Build Coastguard Worker #else
264*61046927SAndroid Build Coastguard Worker    /* FIXME */
265*61046927SAndroid Build Coastguard Worker    return 1;
266*61046927SAndroid Build Coastguard Worker #endif
267*61046927SAndroid Build Coastguard Worker #elif DETECT_ARCH_X86_64
268*61046927SAndroid Build Coastguard Worker    return 1;
269*61046927SAndroid Build Coastguard Worker #else
270*61046927SAndroid Build Coastguard Worker    return 0;
271*61046927SAndroid Build Coastguard Worker #endif
272*61046927SAndroid Build Coastguard Worker }
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker 
275*61046927SAndroid Build Coastguard Worker /**
276*61046927SAndroid Build Coastguard Worker  * @sa cpuid.h included in gcc-4.3 onwards.
277*61046927SAndroid Build Coastguard Worker  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
278*61046927SAndroid Build Coastguard Worker  */
279*61046927SAndroid Build Coastguard Worker static inline void
cpuid(uint32_t ax,uint32_t * p)280*61046927SAndroid Build Coastguard Worker cpuid(uint32_t ax, uint32_t *p)
281*61046927SAndroid Build Coastguard Worker {
282*61046927SAndroid Build Coastguard Worker #if DETECT_CC_GCC && DETECT_ARCH_X86
283*61046927SAndroid Build Coastguard Worker    __asm __volatile (
284*61046927SAndroid Build Coastguard Worker      "xchgl %%ebx, %1\n\t"
285*61046927SAndroid Build Coastguard Worker      "cpuid\n\t"
286*61046927SAndroid Build Coastguard Worker      "xchgl %%ebx, %1"
287*61046927SAndroid Build Coastguard Worker      : "=a" (p[0]),
288*61046927SAndroid Build Coastguard Worker        "=S" (p[1]),
289*61046927SAndroid Build Coastguard Worker        "=c" (p[2]),
290*61046927SAndroid Build Coastguard Worker        "=d" (p[3])
291*61046927SAndroid Build Coastguard Worker      : "0" (ax)
292*61046927SAndroid Build Coastguard Worker    );
293*61046927SAndroid Build Coastguard Worker #elif DETECT_CC_GCC && DETECT_ARCH_X86_64
294*61046927SAndroid Build Coastguard Worker    __asm __volatile (
295*61046927SAndroid Build Coastguard Worker      "cpuid\n\t"
296*61046927SAndroid Build Coastguard Worker      : "=a" (p[0]),
297*61046927SAndroid Build Coastguard Worker        "=b" (p[1]),
298*61046927SAndroid Build Coastguard Worker        "=c" (p[2]),
299*61046927SAndroid Build Coastguard Worker        "=d" (p[3])
300*61046927SAndroid Build Coastguard Worker      : "0" (ax)
301*61046927SAndroid Build Coastguard Worker    );
302*61046927SAndroid Build Coastguard Worker #elif DETECT_CC_MSVC
303*61046927SAndroid Build Coastguard Worker    __cpuid(p, ax);
304*61046927SAndroid Build Coastguard Worker #else
305*61046927SAndroid Build Coastguard Worker    p[0] = 0;
306*61046927SAndroid Build Coastguard Worker    p[1] = 0;
307*61046927SAndroid Build Coastguard Worker    p[2] = 0;
308*61046927SAndroid Build Coastguard Worker    p[3] = 0;
309*61046927SAndroid Build Coastguard Worker #endif
310*61046927SAndroid Build Coastguard Worker }
311*61046927SAndroid Build Coastguard Worker 
312*61046927SAndroid Build Coastguard Worker /**
313*61046927SAndroid Build Coastguard Worker  * @sa cpuid.h included in gcc-4.4 onwards.
314*61046927SAndroid Build Coastguard Worker  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
315*61046927SAndroid Build Coastguard Worker  */
316*61046927SAndroid Build Coastguard Worker static inline void
cpuid_count(uint32_t ax,uint32_t cx,uint32_t * p)317*61046927SAndroid Build Coastguard Worker cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
318*61046927SAndroid Build Coastguard Worker {
319*61046927SAndroid Build Coastguard Worker #if DETECT_CC_GCC && DETECT_ARCH_X86
320*61046927SAndroid Build Coastguard Worker    __asm __volatile (
321*61046927SAndroid Build Coastguard Worker      "xchgl %%ebx, %1\n\t"
322*61046927SAndroid Build Coastguard Worker      "cpuid\n\t"
323*61046927SAndroid Build Coastguard Worker      "xchgl %%ebx, %1"
324*61046927SAndroid Build Coastguard Worker      : "=a" (p[0]),
325*61046927SAndroid Build Coastguard Worker        "=S" (p[1]),
326*61046927SAndroid Build Coastguard Worker        "=c" (p[2]),
327*61046927SAndroid Build Coastguard Worker        "=d" (p[3])
328*61046927SAndroid Build Coastguard Worker      : "0" (ax), "2" (cx)
329*61046927SAndroid Build Coastguard Worker    );
330*61046927SAndroid Build Coastguard Worker #elif DETECT_CC_GCC && DETECT_ARCH_X86_64
331*61046927SAndroid Build Coastguard Worker    __asm __volatile (
332*61046927SAndroid Build Coastguard Worker      "cpuid\n\t"
333*61046927SAndroid Build Coastguard Worker      : "=a" (p[0]),
334*61046927SAndroid Build Coastguard Worker        "=b" (p[1]),
335*61046927SAndroid Build Coastguard Worker        "=c" (p[2]),
336*61046927SAndroid Build Coastguard Worker        "=d" (p[3])
337*61046927SAndroid Build Coastguard Worker      : "0" (ax), "2" (cx)
338*61046927SAndroid Build Coastguard Worker    );
339*61046927SAndroid Build Coastguard Worker #elif DETECT_CC_MSVC
340*61046927SAndroid Build Coastguard Worker    __cpuidex(p, ax, cx);
341*61046927SAndroid Build Coastguard Worker #else
342*61046927SAndroid Build Coastguard Worker    p[0] = 0;
343*61046927SAndroid Build Coastguard Worker    p[1] = 0;
344*61046927SAndroid Build Coastguard Worker    p[2] = 0;
345*61046927SAndroid Build Coastguard Worker    p[3] = 0;
346*61046927SAndroid Build Coastguard Worker #endif
347*61046927SAndroid Build Coastguard Worker }
348*61046927SAndroid Build Coastguard Worker 
349*61046927SAndroid Build Coastguard Worker 
xgetbv(void)350*61046927SAndroid Build Coastguard Worker static inline uint64_t xgetbv(void)
351*61046927SAndroid Build Coastguard Worker {
352*61046927SAndroid Build Coastguard Worker #if DETECT_CC_GCC
353*61046927SAndroid Build Coastguard Worker    uint32_t eax, edx;
354*61046927SAndroid Build Coastguard Worker 
355*61046927SAndroid Build Coastguard Worker    __asm __volatile (
356*61046927SAndroid Build Coastguard Worker      ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4
357*61046927SAndroid Build Coastguard Worker      : "=a"(eax),
358*61046927SAndroid Build Coastguard Worker        "=d"(edx)
359*61046927SAndroid Build Coastguard Worker      : "c"(0)
360*61046927SAndroid Build Coastguard Worker    );
361*61046927SAndroid Build Coastguard Worker 
362*61046927SAndroid Build Coastguard Worker    return ((uint64_t)edx << 32) | eax;
363*61046927SAndroid Build Coastguard Worker #elif DETECT_CC_MSVC && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
364*61046927SAndroid Build Coastguard Worker    return _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
365*61046927SAndroid Build Coastguard Worker #else
366*61046927SAndroid Build Coastguard Worker    return 0;
367*61046927SAndroid Build Coastguard Worker #endif
368*61046927SAndroid Build Coastguard Worker }
369*61046927SAndroid Build Coastguard Worker 
370*61046927SAndroid Build Coastguard Worker 
371*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86
372*61046927SAndroid Build Coastguard Worker UTIL_ALIGN_STACK
373*61046927SAndroid Build Coastguard Worker static inline bool
sse2_has_daz(void)374*61046927SAndroid Build Coastguard Worker sse2_has_daz(void)
375*61046927SAndroid Build Coastguard Worker {
376*61046927SAndroid Build Coastguard Worker    alignas(16) struct {
377*61046927SAndroid Build Coastguard Worker       uint32_t pad1[7];
378*61046927SAndroid Build Coastguard Worker       uint32_t mxcsr_mask;
379*61046927SAndroid Build Coastguard Worker       uint32_t pad2[128-8];
380*61046927SAndroid Build Coastguard Worker    } fxarea;
381*61046927SAndroid Build Coastguard Worker 
382*61046927SAndroid Build Coastguard Worker    fxarea.mxcsr_mask = 0;
383*61046927SAndroid Build Coastguard Worker #if DETECT_CC_GCC
384*61046927SAndroid Build Coastguard Worker    __asm __volatile ("fxsave %0" : "+m" (fxarea));
385*61046927SAndroid Build Coastguard Worker #elif DETECT_CC_MSVC || DETECT_CC_ICL
386*61046927SAndroid Build Coastguard Worker    _fxsave(&fxarea);
387*61046927SAndroid Build Coastguard Worker #else
388*61046927SAndroid Build Coastguard Worker    fxarea.mxcsr_mask = 0;
389*61046927SAndroid Build Coastguard Worker #endif
390*61046927SAndroid Build Coastguard Worker    return !!(fxarea.mxcsr_mask & (1 << 6));
391*61046927SAndroid Build Coastguard Worker }
392*61046927SAndroid Build Coastguard Worker #endif
393*61046927SAndroid Build Coastguard Worker 
394*61046927SAndroid Build Coastguard Worker #endif /* X86 or X86_64 */
395*61046927SAndroid Build Coastguard Worker 
396*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_ARM
397*61046927SAndroid Build Coastguard Worker static void
check_os_arm_support(void)398*61046927SAndroid Build Coastguard Worker check_os_arm_support(void)
399*61046927SAndroid Build Coastguard Worker {
400*61046927SAndroid Build Coastguard Worker    /*
401*61046927SAndroid Build Coastguard Worker     * On Android, the cpufeatures library is preferred way of checking
402*61046927SAndroid Build Coastguard Worker     * CPU capabilities. However, it is not available for standalone Mesa
403*61046927SAndroid Build Coastguard Worker     * builds, i.e. when Android build system (Android.mk-based) is not
404*61046927SAndroid Build Coastguard Worker     * used. Because of this we cannot use DETECT_OS_ANDROID here, but rather
405*61046927SAndroid Build Coastguard Worker     * have a separate macro that only gets enabled from respective Android.mk.
406*61046927SAndroid Build Coastguard Worker     */
407*61046927SAndroid Build Coastguard Worker #if defined(__ARM_NEON) || defined(__ARM_NEON__)
408*61046927SAndroid Build Coastguard Worker    util_cpu_caps.has_neon = 1;
409*61046927SAndroid Build Coastguard Worker #elif DETECT_OS_FREEBSD && defined(HAVE_ELF_AUX_INFO)
410*61046927SAndroid Build Coastguard Worker    unsigned long hwcap = 0;
411*61046927SAndroid Build Coastguard Worker    elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
412*61046927SAndroid Build Coastguard Worker    if (hwcap & HWCAP_NEON)
413*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_neon = 1;
414*61046927SAndroid Build Coastguard Worker #elif defined(HAS_ANDROID_CPUFEATURES)
415*61046927SAndroid Build Coastguard Worker    AndroidCpuFamily cpu_family = android_getCpuFamily();
416*61046927SAndroid Build Coastguard Worker    uint64_t cpu_features = android_getCpuFeatures();
417*61046927SAndroid Build Coastguard Worker 
418*61046927SAndroid Build Coastguard Worker    if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
419*61046927SAndroid Build Coastguard Worker       if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)
420*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_neon = 1;
421*61046927SAndroid Build Coastguard Worker    }
422*61046927SAndroid Build Coastguard Worker #elif DETECT_OS_LINUX
423*61046927SAndroid Build Coastguard Worker     Elf32_auxv_t aux;
424*61046927SAndroid Build Coastguard Worker     int fd;
425*61046927SAndroid Build Coastguard Worker 
426*61046927SAndroid Build Coastguard Worker     fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
427*61046927SAndroid Build Coastguard Worker     if (fd >= 0) {
428*61046927SAndroid Build Coastguard Worker        while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) {
429*61046927SAndroid Build Coastguard Worker           if (aux.a_type == AT_HWCAP) {
430*61046927SAndroid Build Coastguard Worker              uint32_t hwcap = aux.a_un.a_val;
431*61046927SAndroid Build Coastguard Worker 
432*61046927SAndroid Build Coastguard Worker              util_cpu_caps.has_neon = (hwcap >> 12) & 1;
433*61046927SAndroid Build Coastguard Worker              break;
434*61046927SAndroid Build Coastguard Worker           }
435*61046927SAndroid Build Coastguard Worker        }
436*61046927SAndroid Build Coastguard Worker        close (fd);
437*61046927SAndroid Build Coastguard Worker     }
438*61046927SAndroid Build Coastguard Worker #endif /* DETECT_OS_LINUX */
439*61046927SAndroid Build Coastguard Worker }
440*61046927SAndroid Build Coastguard Worker 
441*61046927SAndroid Build Coastguard Worker #elif DETECT_ARCH_AARCH64
442*61046927SAndroid Build Coastguard Worker static void
check_os_arm_support(void)443*61046927SAndroid Build Coastguard Worker check_os_arm_support(void)
444*61046927SAndroid Build Coastguard Worker {
445*61046927SAndroid Build Coastguard Worker     util_cpu_caps.has_neon = true;
446*61046927SAndroid Build Coastguard Worker }
447*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_ARM || DETECT_ARCH_AARCH64 */
448*61046927SAndroid Build Coastguard Worker 
449*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_MIPS64
450*61046927SAndroid Build Coastguard Worker static void
check_os_mips64_support(void)451*61046927SAndroid Build Coastguard Worker check_os_mips64_support(void)
452*61046927SAndroid Build Coastguard Worker {
453*61046927SAndroid Build Coastguard Worker #if DETECT_OS_LINUX
454*61046927SAndroid Build Coastguard Worker     Elf64_auxv_t aux;
455*61046927SAndroid Build Coastguard Worker     int fd;
456*61046927SAndroid Build Coastguard Worker 
457*61046927SAndroid Build Coastguard Worker     fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
458*61046927SAndroid Build Coastguard Worker     if (fd >= 0) {
459*61046927SAndroid Build Coastguard Worker        while (read(fd, &aux, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
460*61046927SAndroid Build Coastguard Worker           if (aux.a_type == AT_HWCAP) {
461*61046927SAndroid Build Coastguard Worker              uint64_t hwcap = aux.a_un.a_val;
462*61046927SAndroid Build Coastguard Worker 
463*61046927SAndroid Build Coastguard Worker              util_cpu_caps.has_msa = (hwcap >> 1) & 1;
464*61046927SAndroid Build Coastguard Worker              break;
465*61046927SAndroid Build Coastguard Worker           }
466*61046927SAndroid Build Coastguard Worker        }
467*61046927SAndroid Build Coastguard Worker        close (fd);
468*61046927SAndroid Build Coastguard Worker     }
469*61046927SAndroid Build Coastguard Worker #endif /* DETECT_OS_LINUX */
470*61046927SAndroid Build Coastguard Worker }
471*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_MIPS64 */
472*61046927SAndroid Build Coastguard Worker 
473*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_LOONGARCH64
474*61046927SAndroid Build Coastguard Worker static void
check_os_loongarch64_support(void)475*61046927SAndroid Build Coastguard Worker check_os_loongarch64_support(void)
476*61046927SAndroid Build Coastguard Worker {
477*61046927SAndroid Build Coastguard Worker #if DETECT_OS_LINUX
478*61046927SAndroid Build Coastguard Worker     Elf64_auxv_t aux;
479*61046927SAndroid Build Coastguard Worker     int fd;
480*61046927SAndroid Build Coastguard Worker 
481*61046927SAndroid Build Coastguard Worker     fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
482*61046927SAndroid Build Coastguard Worker     if (fd >= 0) {
483*61046927SAndroid Build Coastguard Worker        while (read(fd, &aux, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
484*61046927SAndroid Build Coastguard Worker           if (aux.a_type == AT_HWCAP) {
485*61046927SAndroid Build Coastguard Worker              uint64_t hwcap = aux.a_un.a_val;
486*61046927SAndroid Build Coastguard Worker 
487*61046927SAndroid Build Coastguard Worker              util_cpu_caps.has_lsx = (hwcap >> 4) & 1;
488*61046927SAndroid Build Coastguard Worker              util_cpu_caps.has_lasx = (hwcap >> 5) & 1;
489*61046927SAndroid Build Coastguard Worker              break;
490*61046927SAndroid Build Coastguard Worker           }
491*61046927SAndroid Build Coastguard Worker        }
492*61046927SAndroid Build Coastguard Worker        close (fd);
493*61046927SAndroid Build Coastguard Worker     }
494*61046927SAndroid Build Coastguard Worker #endif /* DETECT_OS_LINUX */
495*61046927SAndroid Build Coastguard Worker }
496*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_LOONGARCH64 */
497*61046927SAndroid Build Coastguard Worker 
498*61046927SAndroid Build Coastguard Worker 
499*61046927SAndroid Build Coastguard Worker static void
get_cpu_topology(void)500*61046927SAndroid Build Coastguard Worker get_cpu_topology(void)
501*61046927SAndroid Build Coastguard Worker {
502*61046927SAndroid Build Coastguard Worker    /* Default. This is OK if L3 is not present or there is only one. */
503*61046927SAndroid Build Coastguard Worker    util_cpu_caps.num_L3_caches = 1;
504*61046927SAndroid Build Coastguard Worker 
505*61046927SAndroid Build Coastguard Worker    memset(util_cpu_caps.cpu_to_L3, 0xff, sizeof(util_cpu_caps.cpu_to_L3));
506*61046927SAndroid Build Coastguard Worker 
507*61046927SAndroid Build Coastguard Worker #if DETECT_OS_LINUX
508*61046927SAndroid Build Coastguard Worker    uint64_t big_cap = 0;
509*61046927SAndroid Build Coastguard Worker    unsigned num_big_cpus = 0;
510*61046927SAndroid Build Coastguard Worker    uint64_t *caps = malloc(sizeof(uint64_t) * util_cpu_caps.max_cpus);
511*61046927SAndroid Build Coastguard Worker    bool fail = false;
512*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; caps && i < util_cpu_caps.max_cpus; i++) {
513*61046927SAndroid Build Coastguard Worker       char name[PATH_MAX];
514*61046927SAndroid Build Coastguard Worker       snprintf(name, sizeof(name), "/sys/devices/system/cpu/cpu%u/cpu_capacity", i);
515*61046927SAndroid Build Coastguard Worker       size_t size = 0;
516*61046927SAndroid Build Coastguard Worker       char *cap = os_read_file(name, &size);
517*61046927SAndroid Build Coastguard Worker       if (!cap) {
518*61046927SAndroid Build Coastguard Worker          num_big_cpus = 0;
519*61046927SAndroid Build Coastguard Worker          fail = true;
520*61046927SAndroid Build Coastguard Worker          break;
521*61046927SAndroid Build Coastguard Worker       }
522*61046927SAndroid Build Coastguard Worker       errno = 0;
523*61046927SAndroid Build Coastguard Worker       caps[i] = strtoull(cap, NULL, 10);
524*61046927SAndroid Build Coastguard Worker       free(cap);
525*61046927SAndroid Build Coastguard Worker       if (errno) {
526*61046927SAndroid Build Coastguard Worker          fail = true;
527*61046927SAndroid Build Coastguard Worker          break;
528*61046927SAndroid Build Coastguard Worker       }
529*61046927SAndroid Build Coastguard Worker       big_cap = MAX2(caps[i], big_cap);
530*61046927SAndroid Build Coastguard Worker    }
531*61046927SAndroid Build Coastguard Worker    if (!fail) {
532*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; caps && i < util_cpu_caps.max_cpus; i++) {
533*61046927SAndroid Build Coastguard Worker          if (caps[i] >= big_cap / 2)
534*61046927SAndroid Build Coastguard Worker             num_big_cpus++;
535*61046927SAndroid Build Coastguard Worker       }
536*61046927SAndroid Build Coastguard Worker    }
537*61046927SAndroid Build Coastguard Worker    free(caps);
538*61046927SAndroid Build Coastguard Worker    util_cpu_caps.nr_big_cpus = num_big_cpus;
539*61046927SAndroid Build Coastguard Worker #endif
540*61046927SAndroid Build Coastguard Worker 
541*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
542*61046927SAndroid Build Coastguard Worker    /* AMD Zen */
543*61046927SAndroid Build Coastguard Worker    if (util_cpu_caps.family >= CPU_AMD_ZEN1_ZEN2 &&
544*61046927SAndroid Build Coastguard Worker        util_cpu_caps.family < CPU_AMD_LAST) {
545*61046927SAndroid Build Coastguard Worker       uint32_t regs[4];
546*61046927SAndroid Build Coastguard Worker 
547*61046927SAndroid Build Coastguard Worker       uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0};
548*61046927SAndroid Build Coastguard Worker       uint32_t mask[UTIL_MAX_CPUS / 32] = {0};
549*61046927SAndroid Build Coastguard Worker       bool saved = false;
550*61046927SAndroid Build Coastguard Worker 
551*61046927SAndroid Build Coastguard Worker       uint32_t L3_found[UTIL_MAX_CPUS] = {0};
552*61046927SAndroid Build Coastguard Worker       uint32_t num_L3_caches = 0;
553*61046927SAndroid Build Coastguard Worker       util_affinity_mask *L3_affinity_masks = NULL;
554*61046927SAndroid Build Coastguard Worker 
555*61046927SAndroid Build Coastguard Worker       /* Query APIC IDs from each CPU core.
556*61046927SAndroid Build Coastguard Worker        *
557*61046927SAndroid Build Coastguard Worker        * An APIC ID is a logical ID of the CPU with respect to the cache
558*61046927SAndroid Build Coastguard Worker        * hierarchy, meaning that consecutive APIC IDs are neighbours in
559*61046927SAndroid Build Coastguard Worker        * the hierarchy, e.g. sharing the same cache.
560*61046927SAndroid Build Coastguard Worker        *
561*61046927SAndroid Build Coastguard Worker        * For example, CPU 0 can have APIC ID 0 and CPU 12 can have APIC ID 1,
562*61046927SAndroid Build Coastguard Worker        * which means that both CPU 0 and 12 are next to each other.
563*61046927SAndroid Build Coastguard Worker        * (e.g. they are 2 threads belonging to 1 SMT2 core)
564*61046927SAndroid Build Coastguard Worker        *
565*61046927SAndroid Build Coastguard Worker        * We need to find out which CPUs share the same L3 cache and they can
566*61046927SAndroid Build Coastguard Worker        * be all over the place.
567*61046927SAndroid Build Coastguard Worker        *
568*61046927SAndroid Build Coastguard Worker        * Querying the APIC ID can only be done by pinning the current thread
569*61046927SAndroid Build Coastguard Worker        * to each core. The original affinity mask is saved.
570*61046927SAndroid Build Coastguard Worker        *
571*61046927SAndroid Build Coastguard Worker        * Loop over all possible CPUs even though some may be offline.
572*61046927SAndroid Build Coastguard Worker        */
573*61046927SAndroid Build Coastguard Worker       for (int16_t i = 0; i < util_cpu_caps.max_cpus && i < UTIL_MAX_CPUS; i++) {
574*61046927SAndroid Build Coastguard Worker          uint32_t cpu_bit = 1u << (i % 32);
575*61046927SAndroid Build Coastguard Worker 
576*61046927SAndroid Build Coastguard Worker          mask[i / 32] = cpu_bit;
577*61046927SAndroid Build Coastguard Worker 
578*61046927SAndroid Build Coastguard Worker          /* The assumption is that trying to bind the thread to a CPU that is
579*61046927SAndroid Build Coastguard Worker           * offline will fail.
580*61046927SAndroid Build Coastguard Worker           */
581*61046927SAndroid Build Coastguard Worker          if (util_set_current_thread_affinity(mask,
582*61046927SAndroid Build Coastguard Worker                                               !saved ? saved_mask : NULL,
583*61046927SAndroid Build Coastguard Worker                                               util_cpu_caps.num_cpu_mask_bits)) {
584*61046927SAndroid Build Coastguard Worker             saved = true;
585*61046927SAndroid Build Coastguard Worker 
586*61046927SAndroid Build Coastguard Worker             /* Query the APIC ID of the current core. */
587*61046927SAndroid Build Coastguard Worker             cpuid(0x00000001, regs);
588*61046927SAndroid Build Coastguard Worker             unsigned apic_id = regs[1] >> 24;
589*61046927SAndroid Build Coastguard Worker 
590*61046927SAndroid Build Coastguard Worker             /* Query the total core count for the CPU */
591*61046927SAndroid Build Coastguard Worker             uint32_t core_count = 1;
592*61046927SAndroid Build Coastguard Worker             if (regs[3] & (1 << 28))
593*61046927SAndroid Build Coastguard Worker                core_count = (regs[1] >> 16) & 0xff;
594*61046927SAndroid Build Coastguard Worker 
595*61046927SAndroid Build Coastguard Worker             core_count = util_next_power_of_two(core_count);
596*61046927SAndroid Build Coastguard Worker 
597*61046927SAndroid Build Coastguard Worker             /* Query the L3 cache count. */
598*61046927SAndroid Build Coastguard Worker             cpuid_count(0x8000001D, 3, regs);
599*61046927SAndroid Build Coastguard Worker             unsigned cache_level = (regs[0] >> 5) & 0x7;
600*61046927SAndroid Build Coastguard Worker             unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1;
601*61046927SAndroid Build Coastguard Worker 
602*61046927SAndroid Build Coastguard Worker             if (cache_level != 3)
603*61046927SAndroid Build Coastguard Worker                continue;
604*61046927SAndroid Build Coastguard Worker 
605*61046927SAndroid Build Coastguard Worker             unsigned local_core_id = apic_id & (core_count - 1);
606*61046927SAndroid Build Coastguard Worker             unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count);
607*61046927SAndroid Build Coastguard Worker             unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3);
608*61046927SAndroid Build Coastguard Worker #define L3_ID(p, i) (p << 16 | i << 1 | 1);
609*61046927SAndroid Build Coastguard Worker 
610*61046927SAndroid Build Coastguard Worker             unsigned l3_id = L3_ID(phys_id, local_l3_cache_index);
611*61046927SAndroid Build Coastguard Worker             int idx = -1;
612*61046927SAndroid Build Coastguard Worker             for (unsigned c = 0; c < num_L3_caches; c++) {
613*61046927SAndroid Build Coastguard Worker                if (L3_found[c] == l3_id) {
614*61046927SAndroid Build Coastguard Worker                   idx = c;
615*61046927SAndroid Build Coastguard Worker                   break;
616*61046927SAndroid Build Coastguard Worker                }
617*61046927SAndroid Build Coastguard Worker             }
618*61046927SAndroid Build Coastguard Worker             if (idx == -1) {
619*61046927SAndroid Build Coastguard Worker                idx = num_L3_caches;
620*61046927SAndroid Build Coastguard Worker                L3_found[num_L3_caches++] = l3_id;
621*61046927SAndroid Build Coastguard Worker                L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches);
622*61046927SAndroid Build Coastguard Worker                if (!L3_affinity_masks)
623*61046927SAndroid Build Coastguard Worker                   return;
624*61046927SAndroid Build Coastguard Worker                memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask));
625*61046927SAndroid Build Coastguard Worker             }
626*61046927SAndroid Build Coastguard Worker             util_cpu_caps.cpu_to_L3[i] = idx;
627*61046927SAndroid Build Coastguard Worker             L3_affinity_masks[idx][i / 32] |= cpu_bit;
628*61046927SAndroid Build Coastguard Worker 
629*61046927SAndroid Build Coastguard Worker          }
630*61046927SAndroid Build Coastguard Worker          mask[i / 32] = 0;
631*61046927SAndroid Build Coastguard Worker       }
632*61046927SAndroid Build Coastguard Worker 
633*61046927SAndroid Build Coastguard Worker       util_cpu_caps.num_L3_caches = num_L3_caches;
634*61046927SAndroid Build Coastguard Worker       util_cpu_caps.L3_affinity_mask = L3_affinity_masks;
635*61046927SAndroid Build Coastguard Worker 
636*61046927SAndroid Build Coastguard Worker       if (saved) {
637*61046927SAndroid Build Coastguard Worker          if (debug_get_option_dump_cpu()) {
638*61046927SAndroid Build Coastguard Worker             fprintf(stderr, "CPU <-> L3 cache mapping:\n");
639*61046927SAndroid Build Coastguard Worker             for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) {
640*61046927SAndroid Build Coastguard Worker                fprintf(stderr, "  - L3 %u mask = ", i);
641*61046927SAndroid Build Coastguard Worker                for (int j = util_cpu_caps.max_cpus - 1; j >= 0; j -= 32)
642*61046927SAndroid Build Coastguard Worker                   fprintf(stderr, "%08x ", util_cpu_caps.L3_affinity_mask[i][j / 32]);
643*61046927SAndroid Build Coastguard Worker                fprintf(stderr, "\n");
644*61046927SAndroid Build Coastguard Worker             }
645*61046927SAndroid Build Coastguard Worker          }
646*61046927SAndroid Build Coastguard Worker 
647*61046927SAndroid Build Coastguard Worker          /* Restore the original affinity mask. */
648*61046927SAndroid Build Coastguard Worker          util_set_current_thread_affinity(saved_mask, NULL,
649*61046927SAndroid Build Coastguard Worker                                           util_cpu_caps.num_cpu_mask_bits);
650*61046927SAndroid Build Coastguard Worker       } else {
651*61046927SAndroid Build Coastguard Worker          if (debug_get_option_dump_cpu())
652*61046927SAndroid Build Coastguard Worker             fprintf(stderr, "Cannot set thread affinity for any thread.\n");
653*61046927SAndroid Build Coastguard Worker       }
654*61046927SAndroid Build Coastguard Worker    }
655*61046927SAndroid Build Coastguard Worker #endif
656*61046927SAndroid Build Coastguard Worker }
657*61046927SAndroid Build Coastguard Worker 
658*61046927SAndroid Build Coastguard Worker static
check_cpu_caps_override(void)659*61046927SAndroid Build Coastguard Worker void check_cpu_caps_override(void)
660*61046927SAndroid Build Coastguard Worker {
661*61046927SAndroid Build Coastguard Worker    const char *override_cpu_caps = debug_get_option("GALLIUM_OVERRIDE_CPU_CAPS", NULL);
662*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
663*61046927SAndroid Build Coastguard Worker    if (debug_get_bool_option("GALLIUM_NOSSE", false)) {
664*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_sse = 0;
665*61046927SAndroid Build Coastguard Worker    }
666*61046927SAndroid Build Coastguard Worker #if MESA_DEBUG
667*61046927SAndroid Build Coastguard Worker    /* For simulating less capable machines */
668*61046927SAndroid Build Coastguard Worker    if (debug_get_bool_option("LP_FORCE_SSE2", false)) {
669*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_sse3 = 0;
670*61046927SAndroid Build Coastguard Worker    }
671*61046927SAndroid Build Coastguard Worker #endif
672*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_X86 || DETECT_ARCH_X86_64 */
673*61046927SAndroid Build Coastguard Worker 
674*61046927SAndroid Build Coastguard Worker    if (override_cpu_caps != NULL) {
675*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
676*61046927SAndroid Build Coastguard Worker       if (!strcmp(override_cpu_caps, "nosse")) {
677*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse = 0;
678*61046927SAndroid Build Coastguard Worker       } else if (!strcmp(override_cpu_caps, "sse")) {
679*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse2 = 0;
680*61046927SAndroid Build Coastguard Worker       } else if (!strcmp(override_cpu_caps, "sse2")) {
681*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse3 = 0;
682*61046927SAndroid Build Coastguard Worker       } else if (!strcmp(override_cpu_caps, "sse3")) {
683*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_ssse3 = 0;
684*61046927SAndroid Build Coastguard Worker       } else if (!strcmp(override_cpu_caps, "ssse3")) {
685*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse4_1 = 0;
686*61046927SAndroid Build Coastguard Worker       } else if (!strcmp(override_cpu_caps, "sse4.1")) {
687*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_avx = 0;
688*61046927SAndroid Build Coastguard Worker       } else if (!strcmp(override_cpu_caps, "avx")) {
689*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_avx512f = 0;
690*61046927SAndroid Build Coastguard Worker       }
691*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_X86 || DETECT_ARCH_X86_64 */
692*61046927SAndroid Build Coastguard Worker    }
693*61046927SAndroid Build Coastguard Worker 
694*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
695*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_sse) {
696*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_sse2 = 0;
697*61046927SAndroid Build Coastguard Worker    }
698*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_sse2) {
699*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_sse3 = 0;
700*61046927SAndroid Build Coastguard Worker    }
701*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_sse3) {
702*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_ssse3 = 0;
703*61046927SAndroid Build Coastguard Worker    }
704*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_ssse3) {
705*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_sse4_1 = 0;
706*61046927SAndroid Build Coastguard Worker    }
707*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_sse4_1) {
708*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_sse4_2 = 0;
709*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx = 0;
710*61046927SAndroid Build Coastguard Worker    }
711*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_avx) {
712*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx2 = 0;
713*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_f16c = 0;
714*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_fma = 0;
715*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512f = 0;
716*61046927SAndroid Build Coastguard Worker    }
717*61046927SAndroid Build Coastguard Worker    if (!util_cpu_caps.has_avx512f) {
718*61046927SAndroid Build Coastguard Worker       /* avx512 are cleared */
719*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512dq   = 0;
720*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512ifma = 0;
721*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512pf   = 0;
722*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512er   = 0;
723*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512cd   = 0;
724*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512bw   = 0;
725*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512vl   = 0;
726*61046927SAndroid Build Coastguard Worker       util_cpu_caps.has_avx512vbmi = 0;
727*61046927SAndroid Build Coastguard Worker    }
728*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_X86 || DETECT_ARCH_X86_64 */
729*61046927SAndroid Build Coastguard Worker }
730*61046927SAndroid Build Coastguard Worker 
731*61046927SAndroid Build Coastguard Worker static
check_max_vector_bits(void)732*61046927SAndroid Build Coastguard Worker void check_max_vector_bits(void)
733*61046927SAndroid Build Coastguard Worker {
734*61046927SAndroid Build Coastguard Worker    /* Leave it at 128, even when no SIMD extensions are available.
735*61046927SAndroid Build Coastguard Worker     * Really needs to be a multiple of 128 so can fit 4 floats.
736*61046927SAndroid Build Coastguard Worker     */
737*61046927SAndroid Build Coastguard Worker    util_cpu_caps.max_vector_bits = 128;
738*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
739*61046927SAndroid Build Coastguard Worker    if (util_cpu_caps.has_avx512f) {
740*61046927SAndroid Build Coastguard Worker       util_cpu_caps.max_vector_bits = 512;
741*61046927SAndroid Build Coastguard Worker    } else if (util_cpu_caps.has_avx) {
742*61046927SAndroid Build Coastguard Worker       util_cpu_caps.max_vector_bits = 256;
743*61046927SAndroid Build Coastguard Worker    }
744*61046927SAndroid Build Coastguard Worker #endif
745*61046927SAndroid Build Coastguard Worker }
746*61046927SAndroid Build Coastguard Worker 
747*61046927SAndroid Build Coastguard Worker void _util_cpu_detect_once(void);
748*61046927SAndroid Build Coastguard Worker 
749*61046927SAndroid Build Coastguard Worker void
_util_cpu_detect_once(void)750*61046927SAndroid Build Coastguard Worker _util_cpu_detect_once(void)
751*61046927SAndroid Build Coastguard Worker {
752*61046927SAndroid Build Coastguard Worker    int available_cpus = 0;
753*61046927SAndroid Build Coastguard Worker    int total_cpus = 0;
754*61046927SAndroid Build Coastguard Worker 
755*61046927SAndroid Build Coastguard Worker    memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
756*61046927SAndroid Build Coastguard Worker 
757*61046927SAndroid Build Coastguard Worker    /* Count the number of CPUs in system */
758*61046927SAndroid Build Coastguard Worker #if DETECT_OS_WINDOWS
759*61046927SAndroid Build Coastguard Worker    {
760*61046927SAndroid Build Coastguard Worker       SYSTEM_INFO system_info;
761*61046927SAndroid Build Coastguard Worker       GetSystemInfo(&system_info);
762*61046927SAndroid Build Coastguard Worker       available_cpus = MAX2(1, system_info.dwNumberOfProcessors);
763*61046927SAndroid Build Coastguard Worker    }
764*61046927SAndroid Build Coastguard Worker #elif DETECT_OS_POSIX
765*61046927SAndroid Build Coastguard Worker #  if defined(HAS_SCHED_GETAFFINITY)
766*61046927SAndroid Build Coastguard Worker    {
767*61046927SAndroid Build Coastguard Worker       /* sched_setaffinity() can be used to further restrict the number of
768*61046927SAndroid Build Coastguard Worker        * CPUs on which the process can run.  Use sched_getaffinity() to
769*61046927SAndroid Build Coastguard Worker        * determine the true number of available CPUs.
770*61046927SAndroid Build Coastguard Worker        *
771*61046927SAndroid Build Coastguard Worker        * FIXME: The Linux manual page for sched_getaffinity describes how this
772*61046927SAndroid Build Coastguard Worker        * simple implementation will fail with > 1024 CPUs, and we'll fall back
773*61046927SAndroid Build Coastguard Worker        * to the _SC_NPROCESSORS_ONLN path.  Support for > 1024 CPUs can be
774*61046927SAndroid Build Coastguard Worker        * added to this path once someone has such a system for testing.
775*61046927SAndroid Build Coastguard Worker        */
776*61046927SAndroid Build Coastguard Worker       cpu_set_t affin;
777*61046927SAndroid Build Coastguard Worker       if (sched_getaffinity(getpid(), sizeof(affin), &affin) == 0)
778*61046927SAndroid Build Coastguard Worker          available_cpus = CPU_COUNT(&affin);
779*61046927SAndroid Build Coastguard Worker    }
780*61046927SAndroid Build Coastguard Worker #  endif
781*61046927SAndroid Build Coastguard Worker 
782*61046927SAndroid Build Coastguard Worker    /* Linux, FreeBSD, DragonFly, and Mac OS X should have
783*61046927SAndroid Build Coastguard Worker     * _SC_NOPROCESSORS_ONLN.  NetBSD and OpenBSD should have HW_NCPUONLINE.
784*61046927SAndroid Build Coastguard Worker     * This is what FFmpeg uses on those platforms.
785*61046927SAndroid Build Coastguard Worker     */
786*61046927SAndroid Build Coastguard Worker #  if DETECT_OS_BSD && defined(HW_NCPUONLINE)
787*61046927SAndroid Build Coastguard Worker    if (available_cpus == 0) {
788*61046927SAndroid Build Coastguard Worker       const int mib[] = { CTL_HW, HW_NCPUONLINE };
789*61046927SAndroid Build Coastguard Worker       int ncpu;
790*61046927SAndroid Build Coastguard Worker       size_t len = sizeof(ncpu);
791*61046927SAndroid Build Coastguard Worker 
792*61046927SAndroid Build Coastguard Worker       sysctl(mib, 2, &ncpu, &len, NULL, 0);
793*61046927SAndroid Build Coastguard Worker       available_cpus = ncpu;
794*61046927SAndroid Build Coastguard Worker    }
795*61046927SAndroid Build Coastguard Worker #  elif defined(_SC_NPROCESSORS_ONLN)
796*61046927SAndroid Build Coastguard Worker    if (available_cpus == 0) {
797*61046927SAndroid Build Coastguard Worker       available_cpus = sysconf(_SC_NPROCESSORS_ONLN);
798*61046927SAndroid Build Coastguard Worker       if (available_cpus == ~0)
799*61046927SAndroid Build Coastguard Worker          available_cpus = 1;
800*61046927SAndroid Build Coastguard Worker    }
801*61046927SAndroid Build Coastguard Worker #  elif DETECT_OS_BSD
802*61046927SAndroid Build Coastguard Worker    if (available_cpus == 0) {
803*61046927SAndroid Build Coastguard Worker       const int mib[] = { CTL_HW, HW_NCPU };
804*61046927SAndroid Build Coastguard Worker       int ncpu;
805*61046927SAndroid Build Coastguard Worker       int len = sizeof(ncpu);
806*61046927SAndroid Build Coastguard Worker 
807*61046927SAndroid Build Coastguard Worker       sysctl(mib, 2, &ncpu, &len, NULL, 0);
808*61046927SAndroid Build Coastguard Worker       available_cpus = ncpu;
809*61046927SAndroid Build Coastguard Worker    }
810*61046927SAndroid Build Coastguard Worker #  endif /* DETECT_OS_BSD */
811*61046927SAndroid Build Coastguard Worker 
812*61046927SAndroid Build Coastguard Worker    /* Determine the maximum number of CPUs configured in the system.  This is
813*61046927SAndroid Build Coastguard Worker     * used to properly set num_cpu_mask_bits below.  On BSDs that don't have
814*61046927SAndroid Build Coastguard Worker     * HW_NCPUONLINE, it was not clear whether HW_NCPU is the number of
815*61046927SAndroid Build Coastguard Worker     * configured or the number of online CPUs.  For that reason, prefer the
816*61046927SAndroid Build Coastguard Worker     * _SC_NPROCESSORS_CONF path on all BSDs.
817*61046927SAndroid Build Coastguard Worker     */
818*61046927SAndroid Build Coastguard Worker #  if defined(_SC_NPROCESSORS_CONF)
819*61046927SAndroid Build Coastguard Worker    total_cpus = sysconf(_SC_NPROCESSORS_CONF);
820*61046927SAndroid Build Coastguard Worker    if (total_cpus == ~0)
821*61046927SAndroid Build Coastguard Worker       total_cpus = 1;
822*61046927SAndroid Build Coastguard Worker #  elif DETECT_OS_BSD
823*61046927SAndroid Build Coastguard Worker    {
824*61046927SAndroid Build Coastguard Worker       const int mib[] = { CTL_HW, HW_NCPU };
825*61046927SAndroid Build Coastguard Worker       int ncpu;
826*61046927SAndroid Build Coastguard Worker       int len = sizeof(ncpu);
827*61046927SAndroid Build Coastguard Worker 
828*61046927SAndroid Build Coastguard Worker       sysctl(mib, 2, &ncpu, &len, NULL, 0);
829*61046927SAndroid Build Coastguard Worker       total_cpus = ncpu;
830*61046927SAndroid Build Coastguard Worker    }
831*61046927SAndroid Build Coastguard Worker #  endif /* DETECT_OS_BSD */
832*61046927SAndroid Build Coastguard Worker #endif /* DETECT_OS_POSIX */
833*61046927SAndroid Build Coastguard Worker 
834*61046927SAndroid Build Coastguard Worker    util_cpu_caps.nr_cpus = MAX2(1, available_cpus);
835*61046927SAndroid Build Coastguard Worker    total_cpus = MAX2(total_cpus, util_cpu_caps.nr_cpus);
836*61046927SAndroid Build Coastguard Worker 
837*61046927SAndroid Build Coastguard Worker    util_cpu_caps.max_cpus = total_cpus;
838*61046927SAndroid Build Coastguard Worker    util_cpu_caps.num_cpu_mask_bits = align(total_cpus, 32);
839*61046927SAndroid Build Coastguard Worker 
840*61046927SAndroid Build Coastguard Worker    /* Make the fallback cacheline size nonzero so that it can be
841*61046927SAndroid Build Coastguard Worker     * safely passed to align().
842*61046927SAndroid Build Coastguard Worker     */
843*61046927SAndroid Build Coastguard Worker    util_cpu_caps.cacheline = sizeof(void *);
844*61046927SAndroid Build Coastguard Worker 
845*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
846*61046927SAndroid Build Coastguard Worker    if (has_cpuid()) {
847*61046927SAndroid Build Coastguard Worker       uint32_t regs[4];
848*61046927SAndroid Build Coastguard Worker       uint32_t regs2[4];
849*61046927SAndroid Build Coastguard Worker 
850*61046927SAndroid Build Coastguard Worker       util_cpu_caps.cacheline = 32;
851*61046927SAndroid Build Coastguard Worker 
852*61046927SAndroid Build Coastguard Worker       /* Get max cpuid level */
853*61046927SAndroid Build Coastguard Worker       cpuid(0x00000000, regs);
854*61046927SAndroid Build Coastguard Worker 
855*61046927SAndroid Build Coastguard Worker       if (regs[0] >= 0x00000001) {
856*61046927SAndroid Build Coastguard Worker          unsigned int cacheline;
857*61046927SAndroid Build Coastguard Worker 
858*61046927SAndroid Build Coastguard Worker          cpuid (0x00000001, regs2);
859*61046927SAndroid Build Coastguard Worker 
860*61046927SAndroid Build Coastguard Worker          util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf;
861*61046927SAndroid Build Coastguard Worker          /* Add "extended family". */
862*61046927SAndroid Build Coastguard Worker          if (util_cpu_caps.x86_cpu_type == 0xf)
863*61046927SAndroid Build Coastguard Worker              util_cpu_caps.x86_cpu_type += ((regs2[0] >> 20) & 0xff);
864*61046927SAndroid Build Coastguard Worker 
865*61046927SAndroid Build Coastguard Worker          switch (util_cpu_caps.x86_cpu_type) {
866*61046927SAndroid Build Coastguard Worker          case 0x17:
867*61046927SAndroid Build Coastguard Worker             util_cpu_caps.family = CPU_AMD_ZEN1_ZEN2;
868*61046927SAndroid Build Coastguard Worker             break;
869*61046927SAndroid Build Coastguard Worker          case 0x18:
870*61046927SAndroid Build Coastguard Worker             util_cpu_caps.family = CPU_AMD_ZEN_HYGON;
871*61046927SAndroid Build Coastguard Worker             break;
872*61046927SAndroid Build Coastguard Worker          case 0x19:
873*61046927SAndroid Build Coastguard Worker             util_cpu_caps.family = CPU_AMD_ZEN3;
874*61046927SAndroid Build Coastguard Worker             break;
875*61046927SAndroid Build Coastguard Worker          default:
876*61046927SAndroid Build Coastguard Worker             if (util_cpu_caps.x86_cpu_type > 0x19)
877*61046927SAndroid Build Coastguard Worker                util_cpu_caps.family = CPU_AMD_ZEN_NEXT;
878*61046927SAndroid Build Coastguard Worker          }
879*61046927SAndroid Build Coastguard Worker 
880*61046927SAndroid Build Coastguard Worker          /* general feature flags */
881*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_mmx    = (regs2[3] >> 23) & 1; /* 0x0800000 */
882*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse    = (regs2[3] >> 25) & 1; /* 0x2000000 */
883*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse2   = (regs2[3] >> 26) & 1; /* 0x4000000 */
884*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse3   = (regs2[2] >>  0) & 1; /* 0x0000001 */
885*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_ssse3  = (regs2[2] >>  9) & 1; /* 0x0000020 */
886*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1;
887*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1;
888*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1;
889*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_avx    = ((regs2[2] >> 28) & 1) && // AVX
890*61046927SAndroid Build Coastguard Worker                                     ((regs2[2] >> 27) & 1) && // OSXSAVE
891*61046927SAndroid Build Coastguard Worker                                     ((xgetbv() & 6) == 6);    // XMM & YMM
892*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_f16c   = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx;
893*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_fma    = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx;
894*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
895*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_X86_64
896*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_daz = 1;
897*61046927SAndroid Build Coastguard Worker #else
898*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_daz = util_cpu_caps.has_sse3 ||
899*61046927SAndroid Build Coastguard Worker             (util_cpu_caps.has_sse2 && sse2_has_daz());
900*61046927SAndroid Build Coastguard Worker #endif
901*61046927SAndroid Build Coastguard Worker 
902*61046927SAndroid Build Coastguard Worker          cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
903*61046927SAndroid Build Coastguard Worker          if (cacheline > 0)
904*61046927SAndroid Build Coastguard Worker             util_cpu_caps.cacheline = cacheline;
905*61046927SAndroid Build Coastguard Worker       }
906*61046927SAndroid Build Coastguard Worker       if (regs[0] >= 0x00000007) {
907*61046927SAndroid Build Coastguard Worker          uint32_t regs7[4];
908*61046927SAndroid Build Coastguard Worker          cpuid_count(0x00000007, 0x00000000, regs7);
909*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_clflushopt = (regs7[1] >> 23) & 1;
910*61046927SAndroid Build Coastguard Worker          if (util_cpu_caps.has_avx) {
911*61046927SAndroid Build Coastguard Worker             util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1;
912*61046927SAndroid Build Coastguard Worker 
913*61046927SAndroid Build Coastguard Worker             // check for avx512
914*61046927SAndroid Build Coastguard Worker             if (xgetbv() & (0x7 << 5)) { // OPMASK: upper-256 enabled by OS
915*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512f    = (regs7[1] >> 16) & 1;
916*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512dq   = (regs7[1] >> 17) & 1;
917*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512ifma = (regs7[1] >> 21) & 1;
918*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512pf   = (regs7[1] >> 26) & 1;
919*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512er   = (regs7[1] >> 27) & 1;
920*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512cd   = (regs7[1] >> 28) & 1;
921*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512bw   = (regs7[1] >> 30) & 1;
922*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512vl   = (regs7[1] >> 31) & 1;
923*61046927SAndroid Build Coastguard Worker                util_cpu_caps.has_avx512vbmi = (regs7[2] >>  1) & 1;
924*61046927SAndroid Build Coastguard Worker             }
925*61046927SAndroid Build Coastguard Worker          }
926*61046927SAndroid Build Coastguard Worker       }
927*61046927SAndroid Build Coastguard Worker 
928*61046927SAndroid Build Coastguard Worker       if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
929*61046927SAndroid Build Coastguard Worker          /* GenuineIntel */
930*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_intel = 1;
931*61046927SAndroid Build Coastguard Worker       }
932*61046927SAndroid Build Coastguard Worker 
933*61046927SAndroid Build Coastguard Worker       cpuid(0x80000000, regs);
934*61046927SAndroid Build Coastguard Worker 
935*61046927SAndroid Build Coastguard Worker       if (regs[0] >= 0x80000001) {
936*61046927SAndroid Build Coastguard Worker 
937*61046927SAndroid Build Coastguard Worker          cpuid(0x80000001, regs2);
938*61046927SAndroid Build Coastguard Worker 
939*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_mmx  |= (regs2[3] >> 23) & 1;
940*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1;
941*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1;
942*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1;
943*61046927SAndroid Build Coastguard Worker 
944*61046927SAndroid Build Coastguard Worker          util_cpu_caps.has_xop = util_cpu_caps.has_avx &&
945*61046927SAndroid Build Coastguard Worker                                  ((regs2[2] >> 11) & 1);
946*61046927SAndroid Build Coastguard Worker       }
947*61046927SAndroid Build Coastguard Worker 
948*61046927SAndroid Build Coastguard Worker       if (regs[0] >= 0x80000006) {
949*61046927SAndroid Build Coastguard Worker          /* should we really do this if the clflush size above worked? */
950*61046927SAndroid Build Coastguard Worker          unsigned int cacheline;
951*61046927SAndroid Build Coastguard Worker          cpuid(0x80000006, regs2);
952*61046927SAndroid Build Coastguard Worker          cacheline = regs2[2] & 0xFF;
953*61046927SAndroid Build Coastguard Worker          if (cacheline > 0)
954*61046927SAndroid Build Coastguard Worker             util_cpu_caps.cacheline = cacheline;
955*61046927SAndroid Build Coastguard Worker       }
956*61046927SAndroid Build Coastguard Worker    }
957*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_X86 || DETECT_ARCH_X86_64 */
958*61046927SAndroid Build Coastguard Worker 
959*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_ARM || DETECT_ARCH_AARCH64
960*61046927SAndroid Build Coastguard Worker    check_os_arm_support();
961*61046927SAndroid Build Coastguard Worker #endif
962*61046927SAndroid Build Coastguard Worker 
963*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_PPC
964*61046927SAndroid Build Coastguard Worker    check_os_altivec_support();
965*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_PPC */
966*61046927SAndroid Build Coastguard Worker 
967*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_MIPS64
968*61046927SAndroid Build Coastguard Worker    check_os_mips64_support();
969*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_MIPS64 */
970*61046927SAndroid Build Coastguard Worker 
971*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_LOONGARCH64
972*61046927SAndroid Build Coastguard Worker    check_os_loongarch64_support();
973*61046927SAndroid Build Coastguard Worker #endif /* DETECT_ARCH_LOONGARCH64 */
974*61046927SAndroid Build Coastguard Worker 
975*61046927SAndroid Build Coastguard Worker #if DETECT_ARCH_S390
976*61046927SAndroid Build Coastguard Worker    util_cpu_caps.family = CPU_S390X;
977*61046927SAndroid Build Coastguard Worker #endif
978*61046927SAndroid Build Coastguard Worker 
979*61046927SAndroid Build Coastguard Worker    check_cpu_caps_override();
980*61046927SAndroid Build Coastguard Worker 
981*61046927SAndroid Build Coastguard Worker    /* max_vector_bits should be checked after cpu caps override */
982*61046927SAndroid Build Coastguard Worker    check_max_vector_bits();
983*61046927SAndroid Build Coastguard Worker 
984*61046927SAndroid Build Coastguard Worker    get_cpu_topology();
985*61046927SAndroid Build Coastguard Worker 
986*61046927SAndroid Build Coastguard Worker    if (debug_get_option_dump_cpu()) {
987*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
988*61046927SAndroid Build Coastguard Worker 
989*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
990*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
991*61046927SAndroid Build Coastguard Worker 
992*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
993*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
994*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
995*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
996*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
997*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
998*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
999*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
1000*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
1001*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2);
1002*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
1003*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);
1004*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
1005*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
1006*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
1007*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
1008*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx);
1009*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon);
1010*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_msa = %u\n", util_cpu_caps.has_msa);
1011*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
1012*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_lsx = %u\n", util_cpu_caps.has_lsx);
1013*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_lasx = %u\n", util_cpu_caps.has_lasx);
1014*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f);
1015*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq);
1016*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma);
1017*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512pf = %u\n", util_cpu_caps.has_avx512pf);
1018*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512er = %u\n", util_cpu_caps.has_avx512er);
1019*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512cd = %u\n", util_cpu_caps.has_avx512cd);
1020*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512bw = %u\n", util_cpu_caps.has_avx512bw);
1021*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512vl = %u\n", util_cpu_caps.has_avx512vl);
1022*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_avx512vbmi = %u\n", util_cpu_caps.has_avx512vbmi);
1023*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.has_clflushopt = %u\n", util_cpu_caps.has_clflushopt);
1024*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches);
1025*61046927SAndroid Build Coastguard Worker       printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits);
1026*61046927SAndroid Build Coastguard Worker    }
1027*61046927SAndroid Build Coastguard Worker    _util_cpu_caps_state.caps = util_cpu_caps;
1028*61046927SAndroid Build Coastguard Worker 
1029*61046927SAndroid Build Coastguard Worker    /* This must happen at the end as it's used to guard everything else */
1030*61046927SAndroid Build Coastguard Worker    p_atomic_set(&_util_cpu_caps_state.detect_done, 1);
1031*61046927SAndroid Build Coastguard Worker }
1032