xref: /aosp_15_r20/external/tensorflow/tensorflow/core/platform/cpu_info.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/platform/cpu_info.h"
17 
18 #include "absl/base/call_once.h"
19 #include "tensorflow/core/platform/logging.h"
20 #include "tensorflow/core/platform/platform.h"
21 #include "tensorflow/core/platform/types.h"
22 #if defined(PLATFORM_IS_X86)
23 #include <mutex>  // NOLINT
24 #endif
25 
26 // SIMD extension querying is only available on x86.
27 #ifdef PLATFORM_IS_X86
28 #ifdef PLATFORM_WINDOWS
29 // Visual Studio defines a builtin function for CPUID, so use that if possible.
30 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
31   {                                        \
32     int cpu_info[4] = {-1};                \
33     __cpuidex(cpu_info, a_inp, c_inp);     \
34     a = cpu_info[0];                       \
35     b = cpu_info[1];                       \
36     c = cpu_info[2];                       \
37     d = cpu_info[3];                       \
38   }
39 #else
40 // Otherwise use gcc-format assembler to implement the underlying instructions.
41 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
42   asm("mov %%rbx, %%rdi\n"                 \
43       "cpuid\n"                            \
44       "xchg %%rdi, %%rbx\n"                \
45       : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
46       : "a"(a_inp), "2"(c_inp))
47 #endif
48 #endif
49 
50 namespace tensorflow {
51 namespace port {
52 namespace {
53 
54 #ifdef PLATFORM_IS_X86
55 class CPUIDInfo;
56 void InitCPUIDInfo();
57 
58 CPUIDInfo *cpuid = nullptr;
59 
60 #ifdef PLATFORM_WINDOWS
61 // Visual Studio defines a builtin function, so use that if possible.
GetXCR0EAX()62 int GetXCR0EAX() { return _xgetbv(0); }
63 #else
GetXCR0EAX()64 int GetXCR0EAX() {
65   int eax, edx;
66   asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
67   return eax;
68 }
69 #endif
70 
71 // Structure for basic CPUID info
72 class CPUIDInfo {
73  public:
CPUIDInfo()74   CPUIDInfo()
75       : have_adx_(0),
76         have_aes_(0),
77         have_amx_bf16_(0),
78         have_amx_int8_(0),
79         have_amx_tile_(0),
80         have_avx_(0),
81         have_avx2_(0),
82         have_avx512f_(0),
83         have_avx512cd_(0),
84         have_avx512er_(0),
85         have_avx512pf_(0),
86         have_avx512vl_(0),
87         have_avx512bw_(0),
88         have_avx512dq_(0),
89         have_avx512vbmi_(0),
90         have_avx512ifma_(0),
91         have_avx512_4vnniw_(0),
92         have_avx512_4fmaps_(0),
93         have_avx512_bf16_(0),
94         have_avx512_vnni_(0),
95         have_avx_vnni_(0),
96         have_bmi1_(0),
97         have_bmi2_(0),
98         have_cmov_(0),
99         have_cmpxchg16b_(0),
100         have_cmpxchg8b_(0),
101         have_f16c_(0),
102         have_fma_(0),
103         have_mmx_(0),
104         have_pclmulqdq_(0),
105         have_popcnt_(0),
106         have_prefetchw_(0),
107         have_prefetchwt1_(0),
108         have_rdrand_(0),
109         have_rdseed_(0),
110         have_smap_(0),
111         have_sse_(0),
112         have_sse2_(0),
113         have_sse3_(0),
114         have_sse4_1_(0),
115         have_sse4_2_(0),
116         have_ssse3_(0),
117         have_hypervisor_(0) {}
118 
Initialize()119   static void Initialize() {
120     // Initialize cpuid struct
121     CHECK(cpuid == nullptr) << __func__ << " ran more than once";
122     cpuid = new CPUIDInfo;
123 
124     uint32 eax, ebx, ecx, edx;
125 
126     // Get vendor string (issue CPUID with eax = 0)
127     GETCPUID(eax, ebx, ecx, edx, 0, 0);
128     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
129     cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
130     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
131 
132     // To get general information and extended features we send eax = 1 and
133     // ecx = 0 to cpuid.  The response is returned in eax, ebx, ecx and edx.
134     // (See Intel 64 and IA-32 Architectures Software Developer's Manual
135     // Volume 2A: Instruction Set Reference, A-M CPUID).
136     GETCPUID(eax, ebx, ecx, edx, 1, 0);
137 
138     cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
139     cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
140 
141     cpuid->have_aes_ = (ecx >> 25) & 0x1;
142     cpuid->have_cmov_ = (edx >> 15) & 0x1;
143     cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
144     cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
145     cpuid->have_mmx_ = (edx >> 23) & 0x1;
146     cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
147     cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
148     cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
149     cpuid->have_sse2_ = (edx >> 26) & 0x1;
150     cpuid->have_sse3_ = ecx & 0x1;
151     cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
152     cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
153     cpuid->have_sse_ = (edx >> 25) & 0x1;
154     cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
155     cpuid->have_hypervisor_ = (ecx >> 31) & 1;
156 
157     const uint64 xcr0_xmm_mask = 0x2;
158     const uint64 xcr0_ymm_mask = 0x4;
159     const uint64 xcr0_maskreg_mask = 0x20;
160     const uint64 xcr0_zmm0_15_mask = 0x40;
161     const uint64 xcr0_zmm16_31_mask = 0x80;
162 
163     const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
164     const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
165                                     xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
166 
167     const bool have_avx =
168         // Does the OS support XGETBV instruction use by applications?
169         ((ecx >> 27) & 0x1) &&
170         // Does the OS save/restore XMM and YMM state?
171         ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
172         // Is AVX supported in hardware?
173         ((ecx >> 28) & 0x1);
174 
175     const bool have_avx512 =
176         // Does the OS support XGETBV instruction use by applications?
177         ((ecx >> 27) & 0x1) &&
178         // Does the OS save/restore ZMM state?
179         ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
180 
181     cpuid->have_avx_ = have_avx;
182     cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
183     cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
184 
185     // Get standard level 7 structured extension features (issue CPUID with
186     // eax = 7 and ecx = 0), which is required to check for AVX2 support as
187     // well as other Haswell (and beyond) features.  (See Intel 64 and IA-32
188     // Architectures Software Developer's Manual Volume 2A: Instruction Set
189     // Reference, A-M CPUID).
190     GETCPUID(eax, ebx, ecx, edx, 7, 0);
191     const uint32 kMaxNumSubLeaves = eax;
192 
193     cpuid->have_adx_ = (ebx >> 19) & 0x1;
194     cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
195     cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
196     cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
197     cpuid->have_prefetchwt1_ = ecx & 0x1;
198     cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
199     cpuid->have_smap_ = (ebx >> 20) & 0x1;
200 
201     cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
202     cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
203     cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
204     cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
205     cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
206     cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
207     cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
208     cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
209     cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
210     cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
211     cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
212     cpuid->have_avx512_vnni_ = have_avx512 && ((ecx >> 11) & 0x1);
213 
214     // The latest Intel 64 and IA-32 Architectures Software Developer's Manual
215     // Volume 2A (December 2021) does not have information on AMX yet. We use
216     // the information from Xbyak in oneDNN.
217     // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L536-L538
218     cpuid->have_amx_tile_ = (edx >> 24) & 0x1;
219     cpuid->have_amx_int8_ = (edx >> 25) & 0x1;
220     cpuid->have_amx_bf16_ = (edx >> 22) & 0x1;
221 
222     // Get more Structured Extended Feature info by issuing CPUID with
223     // sub-leaf = 1 (eax = 7, ecx = 1)
224     if (kMaxNumSubLeaves >= 1) {
225       GETCPUID(eax, ebx, ecx, edx, 7, 1);
226       cpuid->have_avx_vnni_ = (eax >> 4) & 0x1;
227       cpuid->have_avx512_bf16_ = have_avx512 && ((eax >> 5) & 0x1);
228     }
229   }
230 
TestFeature(CPUFeature feature)231   static bool TestFeature(CPUFeature feature) {
232     InitCPUIDInfo();
233     // clang-format off
234     switch (feature) {
235       case ADX:           return cpuid->have_adx_;
236       case AES:           return cpuid->have_aes_;
237       case AMX_BF16:      return cpuid->have_amx_bf16_;
238       case AMX_INT8:      return cpuid->have_amx_int8_;
239       case AMX_TILE:      return cpuid->have_amx_tile_;
240       case AVX2:          return cpuid->have_avx2_;
241       case AVX:           return cpuid->have_avx_;
242       case AVX512F:       return cpuid->have_avx512f_;
243       case AVX512CD:      return cpuid->have_avx512cd_;
244       case AVX512PF:      return cpuid->have_avx512pf_;
245       case AVX512ER:      return cpuid->have_avx512er_;
246       case AVX512VL:      return cpuid->have_avx512vl_;
247       case AVX512BW:      return cpuid->have_avx512bw_;
248       case AVX512DQ:      return cpuid->have_avx512dq_;
249       case AVX512VBMI:    return cpuid->have_avx512vbmi_;
250       case AVX512IFMA:    return cpuid->have_avx512ifma_;
251       case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
252       case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
253       case AVX512_BF16:   return cpuid->have_avx512_bf16_;
254       case AVX512_VNNI:   return cpuid->have_avx512_vnni_;
255       case AVX_VNNI:      return cpuid->have_avx_vnni_;
256       case BMI1:          return cpuid->have_bmi1_;
257       case BMI2:          return cpuid->have_bmi2_;
258       case CMOV:          return cpuid->have_cmov_;
259       case CMPXCHG16B:    return cpuid->have_cmpxchg16b_;
260       case CMPXCHG8B:     return cpuid->have_cmpxchg8b_;
261       case F16C:          return cpuid->have_f16c_;
262       case FMA:           return cpuid->have_fma_;
263       case MMX:           return cpuid->have_mmx_;
264       case PCLMULQDQ:     return cpuid->have_pclmulqdq_;
265       case POPCNT:        return cpuid->have_popcnt_;
266       case PREFETCHW:     return cpuid->have_prefetchw_;
267       case PREFETCHWT1:   return cpuid->have_prefetchwt1_;
268       case RDRAND:        return cpuid->have_rdrand_;
269       case RDSEED:        return cpuid->have_rdseed_;
270       case SMAP:          return cpuid->have_smap_;
271       case SSE2:          return cpuid->have_sse2_;
272       case SSE3:          return cpuid->have_sse3_;
273       case SSE4_1:        return cpuid->have_sse4_1_;
274       case SSE4_2:        return cpuid->have_sse4_2_;
275       case SSE:           return cpuid->have_sse_;
276       case SSSE3:         return cpuid->have_ssse3_;
277       case HYPERVISOR:    return cpuid->have_hypervisor_;
278       default:
279         break;
280     }
281     // clang-format on
282     return false;
283   }
284 
vendor_str() const285   string vendor_str() const { return vendor_str_; }
family() const286   int family() const { return family_; }
model_num()287   int model_num() { return model_num_; }
288 
289  private:
290   int have_adx_ : 1;
291   int have_aes_ : 1;
292   int have_amx_bf16_ : 1;
293   int have_amx_int8_ : 1;
294   int have_amx_tile_ : 1;
295   int have_avx_ : 1;
296   int have_avx2_ : 1;
297   int have_avx512f_ : 1;
298   int have_avx512cd_ : 1;
299   int have_avx512er_ : 1;
300   int have_avx512pf_ : 1;
301   int have_avx512vl_ : 1;
302   int have_avx512bw_ : 1;
303   int have_avx512dq_ : 1;
304   int have_avx512vbmi_ : 1;
305   int have_avx512ifma_ : 1;
306   int have_avx512_4vnniw_ : 1;
307   int have_avx512_4fmaps_ : 1;
308   int have_avx512_bf16_ : 1;
309   int have_avx512_vnni_ : 1;
310   int have_avx_vnni_ : 1;
311   int have_bmi1_ : 1;
312   int have_bmi2_ : 1;
313   int have_cmov_ : 1;
314   int have_cmpxchg16b_ : 1;
315   int have_cmpxchg8b_ : 1;
316   int have_f16c_ : 1;
317   int have_fma_ : 1;
318   int have_mmx_ : 1;
319   int have_pclmulqdq_ : 1;
320   int have_popcnt_ : 1;
321   int have_prefetchw_ : 1;
322   int have_prefetchwt1_ : 1;
323   int have_rdrand_ : 1;
324   int have_rdseed_ : 1;
325   int have_smap_ : 1;
326   int have_sse_ : 1;
327   int have_sse2_ : 1;
328   int have_sse3_ : 1;
329   int have_sse4_1_ : 1;
330   int have_sse4_2_ : 1;
331   int have_ssse3_ : 1;
332   int have_hypervisor_ : 1;
333   string vendor_str_;
334   int family_;
335   int model_num_;
336 };
337 
338 absl::once_flag cpuid_once_flag;
339 
InitCPUIDInfo()340 void InitCPUIDInfo() {
341   // This ensures that CPUIDInfo::Initialize() is called exactly
342   // once regardless of how many threads concurrently call us
343   absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
344 }
345 
346 #endif  // PLATFORM_IS_X86
347 
348 }  // namespace
349 
TestCPUFeature(CPUFeature feature)350 bool TestCPUFeature(CPUFeature feature) {
351 #ifdef PLATFORM_IS_X86
352   return CPUIDInfo::TestFeature(feature);
353 #else
354   return false;
355 #endif
356 }
357 
CPUVendorIDString()358 std::string CPUVendorIDString() {
359 #ifdef PLATFORM_IS_X86
360   InitCPUIDInfo();
361   return cpuid->vendor_str();
362 #else
363   return "";
364 #endif
365 }
366 
CPUFamily()367 int CPUFamily() {
368 #ifdef PLATFORM_IS_X86
369   InitCPUIDInfo();
370   return cpuid->family();
371 #else
372   return 0;
373 #endif
374 }
375 
CPUModelNum()376 int CPUModelNum() {
377 #ifdef PLATFORM_IS_X86
378   InitCPUIDInfo();
379   return cpuid->model_num();
380 #else
381   return 0;
382 #endif
383 }
384 
CPUIDNumSMT()385 int CPUIDNumSMT() {
386 #ifdef PLATFORM_IS_X86
387   // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
388   // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
389   // Section: Detecting Hardware Multi-threads Support and Topology
390   // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
391   // Other cases not supported
392   uint32 eax, ebx, ecx, edx;
393   // Check if system supports Leaf 11
394   GETCPUID(eax, ebx, ecx, edx, 0, 0);
395   if (eax >= 11) {
396     // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
397     // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
398     // ECX=0):ECX[15:8] is 1
399     GETCPUID(eax, ebx, ecx, edx, 11, 0);
400     if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
401       return 1 << (eax & 0x1f);  // 2 ^ SMT_Mask_Width
402     }
403   }
404 #endif  // PLATFORM_IS_X86
405   return 0;
406 }
407 
408 }  // namespace port
409 }  // namespace tensorflow
410