xref: /aosp_15_r20/external/mesa3d/src/util/u_cpu_detect.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2008 Dennis Smit
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
20  * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  ***************************************************************************/
26 
27 /**
28  * @file
29  * CPU feature detection.
30  *
31  * @author Dennis Smit
32  * @author Based on the work of Eric Anholt <[email protected]>
33  */
34 
35 #ifndef _UTIL_CPU_DETECT_H
36 #define _UTIL_CPU_DETECT_H
37 
38 #include <stdbool.h>
39 
40 #include "util/macros.h"
41 #include "util/u_atomic.h"
42 #include "util/u_thread.h"
43 
44 
45 /* Maximal cpu count for update affinity */
46 #define UTIL_MAX_CPUS               1024  /* this should be enough */
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 enum cpu_family {
53    CPU_UNKNOWN,
54 
55    CPU_AMD_ZEN1_ZEN2,
56    CPU_AMD_ZEN_HYGON,
57    CPU_AMD_ZEN3,
58    CPU_AMD_ZEN_NEXT,
59    CPU_AMD_LAST,
60 
61    CPU_S390X,
62 };
63 
64 typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
65 
66 struct util_cpu_caps_t {
67    /**
68     * Number of CPUs available to the process.
69     *
70     * This will be less than or equal to \c max_cpus.  This is the number of
71     * CPUs that are online and available to the process.
72     */
73    int16_t nr_cpus;
74 
75    /**
76     * Maximum number of CPUs that can be online in the system.
77     *
78     * This will be greater than or equal to \c nr_cpus.  This is the number of
79     * CPUs installed in the system.  \c nr_cpus will be less if some CPUs are
80     * offline.
81     */
82    int16_t max_cpus;
83 
84    enum cpu_family family;
85 
86    /* Feature flags */
87    int x86_cpu_type;
88    unsigned cacheline;
89 
90    unsigned has_intel:1;
91    unsigned has_mmx:1;
92    unsigned has_mmx2:1;
93    unsigned has_sse:1;
94    unsigned has_sse2:1;
95    unsigned has_sse3:1;
96    unsigned has_ssse3:1;
97    unsigned has_sse4_1:1;
98    unsigned has_sse4_2:1;
99    unsigned has_popcnt:1;
100    unsigned has_avx:1;
101    unsigned has_avx2:1;
102    unsigned has_f16c:1;
103    unsigned has_fma:1;
104    unsigned has_3dnow:1;
105    unsigned has_3dnow_ext:1;
106    unsigned has_xop:1;
107    unsigned has_altivec:1;
108    unsigned has_vsx:1;
109    unsigned has_daz:1;
110    unsigned has_neon:1;
111    unsigned has_msa:1;
112    unsigned has_lsx:1;
113    unsigned has_lasx:1;
114 
115    unsigned has_avx512f:1;
116    unsigned has_avx512dq:1;
117    unsigned has_avx512ifma:1;
118    unsigned has_avx512pf:1;
119    unsigned has_avx512er:1;
120    unsigned has_avx512cd:1;
121    unsigned has_avx512bw:1;
122    unsigned has_avx512vl:1;
123    unsigned has_avx512vbmi:1;
124 
125    unsigned has_clflushopt:1;
126 
127    unsigned num_L3_caches;
128    unsigned num_cpu_mask_bits;
129    unsigned max_vector_bits;
130 
131    uint16_t cpu_to_L3[UTIL_MAX_CPUS];
132 
133    /* Affinity masks for each L3 cache. */
134    util_affinity_mask *L3_affinity_mask;
135    /**
136     * number of "big" CPUs in big.LITTLE configuration
137     *
138     * a "big" CPU is defined as anything with >= 50% the capacity of the largest CPU,
139     * useful for drivers determining how many and what kinds of threads to use
140     * example: 1x prime + 3x big + 4x little = 4x "big" cores
141     *
142     * A value of zero indicates that CPUs are homogeneous.
143     */
144    int16_t nr_big_cpus;
145 };
146 
147 struct _util_cpu_caps_state_t {
148    once_flag once_flag;
149    /**
150     * Initialized to 0 and set to non-zero with an atomic after the entire
151     * struct has been initialized.
152     */
153    uint32_t detect_done;
154    struct util_cpu_caps_t caps;
155 };
156 
157 #define U_CPU_INVALID_L3 0xffff
158 
159 static inline ATTRIBUTE_CONST const struct util_cpu_caps_t *
util_get_cpu_caps(void)160 util_get_cpu_caps(void)
161 {
162    extern void _util_cpu_detect_once(void);
163    extern struct _util_cpu_caps_state_t _util_cpu_caps_state;
164 
165    /* On most CPU architectures, an atomic read is simply a regular memory
166     * load instruction with some extra compiler magic to prevent code
167     * re-ordering around it.  The perf impact of doing this check should be
168     * negligible in most cases.
169     *
170     * Also, even though it looks like  a bit of a lie, we've declared this
171     * function with ATTRIBUTE_CONST.  The GCC docs say:
172     *
173     *    "Calls to functions whose return value is not affected by changes to
174     *    the observable state of the program and that have no observable
175     *    effects on such state other than to return a value may lend
176     *    themselves to optimizations such as common subexpression elimination.
177     *    Declaring such functions with the const attribute allows GCC to avoid
178     *    emitting some calls in repeated invocations of the function with the
179     *    same argument values."
180     *
181     * The word "observable" is important here.  With the exception of a
182     * llvmpipe debug flag behind an environment variable and a few unit tests,
183     * all of which emulate worse CPUs, this function neither affects nor is
184     * affected by any "observable" state.  It has its own internal state for
185     * sure, but that state is such that it appears to return exactly the same
186     * value with the same internal data every time.
187     */
188    if (unlikely(!p_atomic_read(&_util_cpu_caps_state.detect_done)))
189       call_once(&_util_cpu_caps_state.once_flag, _util_cpu_detect_once);
190 
191    return &_util_cpu_caps_state.caps;
192 }
193 
194 #ifdef __cplusplus
195 }
196 #endif
197 
198 
199 #endif /* _UTIL_CPU_DETECT_H */
200