xref: /aosp_15_r20/external/cpu_features/src/impl_x86__base_implementation.inl (revision eca53ba6d2e951e174b64682eaf56a36b8204c89)
1// Copyright 2017 Google LLC
2// Copyright 2020 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//    http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// A note on x86 SIMD instructions availability
17// -----------------------------------------------------------------------------
18// A number of conditions need to be met for an application to use SIMD
19// instructions:
20// 1. The CPU itself must support the instruction.
21// - we use `CPUID` to check whether the feature is supported.
22// 2. The OS must save and restore the associated SIMD register across context
23//    switches, we check that:
24// - the CPU reports supporting hardware context switching instructions via
25//   CPUID.1:ECX.XSAVE[bit 26]
26// - the OS reports supporting hardware context switching instructions via
27//   CPUID.1:ECX.OSXSAVE[bit 27]
28// - the CPU extended control register 0 (XCR0) is set to save and restore the
29//   needed SIMD registers
30//
31// Note that if `XSAVE`/`OSXSAVE` are missing, we delegate the detection to the
32// OS via the `DetectFeaturesFromOs` function or via microarchitecture
33// heuristics.
34//
35// Encoding
36// -----------------------------------------------------------------------------
37// X86Info contains fields such as vendor and brand_string that are ASCII
38// encoded strings. `vendor` length of characters is 13 and `brand_string` is 49
39// (with null terminated string). We use CPUID.1:E[D,C,B]X to get `vendor` and
40// CPUID.8000_000[4:2]:E[D,C,B,A]X to get `brand_string`
41//
42// Microarchitecture
43// -----------------------------------------------------------------------------
44// `GetX86Microarchitecture` function consists of check on vendor via
45// `IsVendorByX86Info`. We use `CPUID(family, model)` to define the vendor's
46//  microarchitecture. In cases where the `family` and `model` is the same for
47//  several microarchitectures we do a stepping check or in the worst case we
48//  rely on parsing brand_string (see HasSecondFMA for an example). Details of
49//  identification by `brand_string` can be found by reference:
50//  https://en.wikichip.org/wiki/intel/microarchitectures/cascade_lake
51//  https://www.intel.com/content/www/us/en/processors/processor-numbers.html
52
53// CacheInfo X86
54// -----------------------------------------------------------------------------
55// We use the CacheInfo struct to store information about cache levels. The
56// maximum number of levels is hardcoded but can be increased if needed. We have
57// full support of cache identification for the following processors:
58// • Intel:
59//    ◦ modern processors:
60//        we use `ParseCacheInfo` function with `leaf_id` 0x00000004.
61//    ◦ old processors:
62//        we parse descriptors via `GetCacheLevelInfo`, see Application Note
63//        485: Intel Processor Identification and CPUID Instruction.
64// • AMD:
65//    ◦ modern processors:
66//        we use `ParseCacheInfo` function with `leaf_id` 0x8000001D.
67//    ◦ old processors:
68//        we parse cache info using Fn8000_0005_E[A,B,C,D]X and
69//        Fn8000_0006_E[A,B,C,D]X. See AMD CPUID Specification:
70//        https://www.amd.com/system/files/TechDocs/25481.pdf.
71// • Hygon:
72//    we reuse AMD cache detection implementation.
73// • Zhaoxin:
74//    we reuse Intel cache detection implementation.
75//
76// Internal structures
77// -----------------------------------------------------------------------------
78// We use internal structures such as `Leaves` and `OsPreserves` to cache the
79// result of cpuid info and support of registers, since latency of CPUID
80// instruction is around ~100 cycles, see
81// https://www.agner.org/optimize/instruction_tables.pdf. Hence, we use
82// `ReadLeaves` function for `GetX86Info`, `GetCacheInfo` and
83// `FillX86BrandString` to read leaves and hold these values to avoid redundant
84// call on the same leaf.
85
86#include <stdbool.h>
87#include <string.h>
88
89#include "copy.inl"
90#include "cpuinfo_x86.h"
91#include "equals.inl"
92#include "internal/bit_utils.h"
93#include "internal/cpuid_x86.h"
94
95#if !defined(CPU_FEATURES_ARCH_X86)
96#error "Cannot compile cpuinfo_x86 on a non x86 platform."
97#endif
98
99////////////////////////////////////////////////////////////////////////////////
100// Definitions for CpuId and GetXCR0Eax.
101////////////////////////////////////////////////////////////////////////////////
102
103#if defined(CPU_FEATURES_MOCK_CPUID_X86)
104// Implementation will be provided by test/cpuinfo_x86_test.cc.
105#elif defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC)
106
107#include <cpuid.h>
108
109Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx) {
110  Leaf leaf;
111  __cpuid_count(leaf_id, ecx, leaf.eax, leaf.ebx, leaf.ecx, leaf.edx);
112  return leaf;
113}
114
115uint32_t GetXCR0Eax(void) {
116  uint32_t eax, edx;
117  /* named form of xgetbv not supported on OSX, so must use byte form, see:
118     https://github.com/asmjit/asmjit/issues/78
119   */
120  __asm(".byte 0x0F, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
121  return eax;
122}
123
124#elif defined(CPU_FEATURES_COMPILER_MSC)
125
126#include <immintrin.h>
127#include <intrin.h>  // For __cpuidex()
128
129Leaf GetCpuidLeaf(uint32_t leaf_id, int ecx) {
130  Leaf leaf;
131  int data[4];
132  __cpuidex(data, leaf_id, ecx);
133  leaf.eax = data[0];
134  leaf.ebx = data[1];
135  leaf.ecx = data[2];
136  leaf.edx = data[3];
137  return leaf;
138}
139
140uint32_t GetXCR0Eax(void) { return (uint32_t)_xgetbv(0); }
141
142#else
143#error "Unsupported compiler, x86 cpuid requires either GCC, Clang or MSVC."
144#endif
145
146static const Leaf kEmptyLeaf;
147
148static Leaf SafeCpuIdEx(uint32_t max_cpuid_leaf, uint32_t leaf_id, int ecx) {
149  if (leaf_id <= max_cpuid_leaf) {
150    return GetCpuidLeaf(leaf_id, ecx);
151  } else {
152    return kEmptyLeaf;
153  }
154}
155
156typedef struct {
157  uint32_t max_cpuid_leaf;
158  Leaf leaf_0;    // Root
159  Leaf leaf_1;    // Family, Model, Stepping
160  Leaf leaf_2;    // Intel cache info + features
161  Leaf leaf_7;    // Features
162  Leaf leaf_7_1;  // Features
163  uint32_t max_cpuid_leaf_ext;
164  Leaf leaf_80000000;  // Root for extended leaves
165  Leaf leaf_80000001;  // AMD features features and cache
166  Leaf leaf_80000002;  // brand string
167  Leaf leaf_80000003;  // brand string
168  Leaf leaf_80000004;  // brand string
169  Leaf leaf_80000021;  // AMD Extended Feature Identification 2
170} Leaves;
171
172static Leaves ReadLeaves(void) {
173  const Leaf leaf_0 = GetCpuidLeaf(0, 0);
174  const uint32_t max_cpuid_leaf = leaf_0.eax;
175  const Leaf leaf_80000000 = GetCpuidLeaf(0x80000000, 0);
176  const uint32_t max_cpuid_leaf_ext = leaf_80000000.eax;
177  return (Leaves){
178      .max_cpuid_leaf = max_cpuid_leaf,
179      .leaf_0 = leaf_0,
180      .leaf_1 = SafeCpuIdEx(max_cpuid_leaf, 0x00000001, 0),
181      .leaf_2 = SafeCpuIdEx(max_cpuid_leaf, 0x00000002, 0),
182      .leaf_7 = SafeCpuIdEx(max_cpuid_leaf, 0x00000007, 0),
183      .leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 0x00000007, 1),
184      .max_cpuid_leaf_ext = max_cpuid_leaf_ext,
185      .leaf_80000000 = leaf_80000000,
186      .leaf_80000001 = SafeCpuIdEx(max_cpuid_leaf_ext, 0x80000001, 0),
187      .leaf_80000002 = SafeCpuIdEx(max_cpuid_leaf_ext, 0x80000002, 0),
188      .leaf_80000003 = SafeCpuIdEx(max_cpuid_leaf_ext, 0x80000003, 0),
189      .leaf_80000004 = SafeCpuIdEx(max_cpuid_leaf_ext, 0x80000004, 0),
190      .leaf_80000021 = SafeCpuIdEx(max_cpuid_leaf_ext, 0x80000021, 0),
191  };
192}
193
194////////////////////////////////////////////////////////////////////////////////
195// OS support
196////////////////////////////////////////////////////////////////////////////////
197
198#define MASK_XMM 0x2
199#define MASK_YMM 0x4
200#define MASK_MASKREG 0x20
201#define MASK_ZMM0_15 0x40
202#define MASK_ZMM16_31 0x80
203#define MASK_XTILECFG 0x20000
204#define MASK_XTILEDATA 0x40000
205
206static bool HasMask(uint32_t value, uint32_t mask) {
207  return (value & mask) == mask;
208}
209
210// Checks that operating system saves and restores xmm registers during context
211// switches.
212static bool HasXmmOsXSave(uint32_t xcr0_eax) {
213  return HasMask(xcr0_eax, MASK_XMM);
214}
215
216// Checks that operating system saves and restores ymm registers during context
217// switches.
218static bool HasYmmOsXSave(uint32_t xcr0_eax) {
219  return HasMask(xcr0_eax, MASK_XMM | MASK_YMM);
220}
221
222// Checks that operating system saves and restores zmm registers during context
223// switches.
224static bool HasZmmOsXSave(uint32_t xcr0_eax) {
225  return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 |
226                               MASK_ZMM16_31);
227}
228
229// Checks that operating system saves and restores AMX/TMUL state during context
230// switches.
231static bool HasTmmOsXSave(uint32_t xcr0_eax) {
232  return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 |
233                               MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA);
234}
235
236////////////////////////////////////////////////////////////////////////////////
237// Vendor
238////////////////////////////////////////////////////////////////////////////////
239
240static void SetVendor(const Leaf leaf, char* const vendor) {
241  *(uint32_t*)(vendor) = leaf.ebx;
242  *(uint32_t*)(vendor + 4) = leaf.edx;
243  *(uint32_t*)(vendor + 8) = leaf.ecx;
244  vendor[12] = '\0';
245}
246
247static int IsVendor(const Leaf leaf, const char* const name) {
248  const uint32_t ebx = *(const uint32_t*)(name);
249  const uint32_t edx = *(const uint32_t*)(name + 4);
250  const uint32_t ecx = *(const uint32_t*)(name + 8);
251  return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx;
252}
253
254static int IsVendorByX86Info(const X86Info* info, const char* const name) {
255  return equals(info->vendor, name, sizeof(info->vendor));
256}
257
258// TODO: Remove when deprecation period is over,
259void FillX86BrandString(char brand_string[49]) {
260  const Leaves leaves = ReadLeaves();
261  const Leaf packed[3] = {
262      leaves.leaf_80000002,
263      leaves.leaf_80000003,
264      leaves.leaf_80000004,
265  };
266#if __STDC_VERSION__ >= 201112L
267  _Static_assert(sizeof(packed) == 48, "Leaves must be packed");
268#endif
269  copy(brand_string, (const char*)(packed), 48);
270  brand_string[48] = '\0';
271}
272
273////////////////////////////////////////////////////////////////////////////////
274// CpuId
275////////////////////////////////////////////////////////////////////////////////
276
277static bool HasSecondFMA(const X86Info* info) {
278  // Skylake server
279  if (info->model == 0x55) {
280    // detect Xeon
281    if (info->brand_string[9] == 'X') {
282      // detect Silver or Bronze
283      if (info->brand_string[17] == 'S' || info->brand_string[17] == 'B')
284        return false;
285      // detect Gold 5_20 and below, except for Gold 53__
286      if (info->brand_string[17] == 'G' && info->brand_string[22] == '5')
287        return (
288            (info->brand_string[23] == '3') ||
289            (info->brand_string[24] == '2' && info->brand_string[25] == '2'));
290      // detect Xeon W 210x
291      if (info->brand_string[17] == 'W' && info->brand_string[21] == '0')
292        return false;
293      // detect Xeon D 2xxx
294      if (info->brand_string[17] == 'D' && info->brand_string[19] == '2' &&
295          info->brand_string[20] == '1')
296        return false;
297    }
298    return true;
299  }
300  // Cannon Lake client
301  if (info->model == 0x66) return false;
302  // Ice Lake client
303  if (info->model == 0x7d || info->model == 0x7e) return false;
304  // This is the right default...
305  return true;
306}
307
308// Internal structure to hold the OS support for vector operations.
309// Avoid to recompute them since each call to cpuid is ~100 cycles.
310typedef struct {
311  bool sse_registers;
312  bool avx_registers;
313  bool avx512_registers;
314  bool amx_registers;
315} OsPreserves;
316
317// These two functions have to be implemented by the OS, that is the file
318// including this file.
319static void OverrideOsPreserves(OsPreserves* os_preserves);
320static void DetectFeaturesFromOs(X86Info* info, X86Features* features);
321
322// Reference https://en.wikipedia.org/wiki/CPUID.
323static void ParseCpuId(const Leaves* leaves, X86Info* info,
324                       OsPreserves* os_preserves) {
325  const Leaf leaf_1 = leaves->leaf_1;
326  const Leaf leaf_7 = leaves->leaf_7;
327  const Leaf leaf_7_1 = leaves->leaf_7_1;
328  const Leaf leaf_80000001 = leaves->leaf_80000001;
329
330  const bool have_xsave = IsBitSet(leaf_1.ecx, 26);
331  const bool have_osxsave = IsBitSet(leaf_1.ecx, 27);
332  const bool have_xcr0 = have_xsave && have_osxsave;
333
334  const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8);
335  const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20);
336  const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4);
337  const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16);
338
339  X86Features* const features = &info->features;
340
341  // Fill Family, Model and Stepping.
342  info->family = extended_family + family;
343  info->model = (extended_model << 4) + model;
344  info->stepping = ExtractBitRange(leaf_1.eax, 3, 0);
345
346  // Fill Brand String.
347  const Leaf packed[3] = {
348      leaves->leaf_80000002,
349      leaves->leaf_80000003,
350      leaves->leaf_80000004,
351  };
352#if __STDC_VERSION__ >= 201112L
353  _Static_assert(sizeof(packed) == 48, "Leaves must be packed");
354#endif
355  copy(info->brand_string, (const char*)(packed), 48);
356  info->brand_string[48] = '\0';
357
358  // Fill cpu features.
359  features->fpu = IsBitSet(leaf_1.edx, 0);
360  features->tsc = IsBitSet(leaf_1.edx, 4);
361  features->cx8 = IsBitSet(leaf_1.edx, 8);
362  features->clfsh = IsBitSet(leaf_1.edx, 19);
363  features->mmx = IsBitSet(leaf_1.edx, 23);
364  features->ss = IsBitSet(leaf_1.edx, 27);
365  features->pclmulqdq = IsBitSet(leaf_1.ecx, 1);
366  features->smx = IsBitSet(leaf_1.ecx, 6);
367  features->cx16 = IsBitSet(leaf_1.ecx, 13);
368  features->dca = IsBitSet(leaf_1.ecx, 18);
369  features->movbe = IsBitSet(leaf_1.ecx, 22);
370  features->popcnt = IsBitSet(leaf_1.ecx, 23);
371  features->aes = IsBitSet(leaf_1.ecx, 25);
372  features->f16c = IsBitSet(leaf_1.ecx, 29);
373  features->rdrnd = IsBitSet(leaf_1.ecx, 30);
374  features->sgx = IsBitSet(leaf_7.ebx, 2);
375  features->bmi1 = IsBitSet(leaf_7.ebx, 3);
376  features->hle = IsBitSet(leaf_7.ebx, 4);
377  features->bmi2 = IsBitSet(leaf_7.ebx, 8);
378  features->erms = IsBitSet(leaf_7.ebx, 9);
379  features->rtm = IsBitSet(leaf_7.ebx, 11);
380  features->rdseed = IsBitSet(leaf_7.ebx, 18);
381  features->clflushopt = IsBitSet(leaf_7.ebx, 23);
382  features->clwb = IsBitSet(leaf_7.ebx, 24);
383  features->sha = IsBitSet(leaf_7.ebx, 29);
384  features->gfni = IsBitSet(leaf_7.ecx, 8);
385  features->vaes = IsBitSet(leaf_7.ecx, 9);
386  features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10);
387  features->movdiri = IsBitSet(leaf_7.ecx, 27);
388  features->movdir64b = IsBitSet(leaf_7.ecx, 28);
389  features->fs_rep_mov = IsBitSet(leaf_7.edx, 4);
390  features->fz_rep_movsb = IsBitSet(leaf_7_1.eax, 10);
391  features->fs_rep_stosb = IsBitSet(leaf_7_1.eax, 11);
392  features->fs_rep_cmpsb_scasb = IsBitSet(leaf_7_1.eax, 12);
393  features->adx = IsBitSet(leaf_7.ebx, 19);
394  features->lzcnt = IsBitSet(leaf_80000001.ecx, 5);
395  features->lam = IsBitSet(leaf_7_1.eax, 26);
396
397  /////////////////////////////////////////////////////////////////////////////
398  // The following section is devoted to Vector Extensions.
399  /////////////////////////////////////////////////////////////////////////////
400
401  // CPU with AVX expose XCR0 which enables checking vector extensions OS
402  // support through cpuid.
403  if (have_xcr0) {
404    // Here we rely exclusively on cpuid for both CPU and OS support of vector
405    // extensions.
406    const uint32_t xcr0_eax = GetXCR0Eax();
407    os_preserves->sse_registers = HasXmmOsXSave(xcr0_eax);
408    os_preserves->avx_registers = HasYmmOsXSave(xcr0_eax);
409    os_preserves->avx512_registers = HasZmmOsXSave(xcr0_eax);
410    os_preserves->amx_registers = HasTmmOsXSave(xcr0_eax);
411    OverrideOsPreserves(os_preserves);
412
413    if (os_preserves->sse_registers) {
414      features->sse = IsBitSet(leaf_1.edx, 25);
415      features->sse2 = IsBitSet(leaf_1.edx, 26);
416      features->sse3 = IsBitSet(leaf_1.ecx, 0);
417      features->ssse3 = IsBitSet(leaf_1.ecx, 9);
418      features->sse4_1 = IsBitSet(leaf_1.ecx, 19);
419      features->sse4_2 = IsBitSet(leaf_1.ecx, 20);
420    }
421    if (os_preserves->avx_registers) {
422      features->fma3 = IsBitSet(leaf_1.ecx, 12);
423      features->avx = IsBitSet(leaf_1.ecx, 28);
424      features->avx_vnni = IsBitSet(leaf_7_1.eax, 4);
425      features->avx2 = IsBitSet(leaf_7.ebx, 5);
426    }
427    if (os_preserves->avx512_registers) {
428      features->avx512f = IsBitSet(leaf_7.ebx, 16);
429      features->avx512cd = IsBitSet(leaf_7.ebx, 28);
430      features->avx512er = IsBitSet(leaf_7.ebx, 27);
431      features->avx512pf = IsBitSet(leaf_7.ebx, 26);
432      features->avx512bw = IsBitSet(leaf_7.ebx, 30);
433      features->avx512dq = IsBitSet(leaf_7.ebx, 17);
434      features->avx512vl = IsBitSet(leaf_7.ebx, 31);
435      features->avx512ifma = IsBitSet(leaf_7.ebx, 21);
436      features->avx512vbmi = IsBitSet(leaf_7.ecx, 1);
437      features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6);
438      features->avx512vnni = IsBitSet(leaf_7.ecx, 11);
439      features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
440      features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
441      features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
442      features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
443      features->avx512_second_fma = HasSecondFMA(info);
444      features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
445      features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
446      features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
447      features->avx512_fp16 = IsBitSet(leaf_7.edx, 23);
448    }
449    if (os_preserves->amx_registers) {
450      features->amx_bf16 = IsBitSet(leaf_7.edx, 22);
451      features->amx_tile = IsBitSet(leaf_7.edx, 24);
452      features->amx_int8 = IsBitSet(leaf_7.edx, 25);
453      features->amx_fp16 = IsBitSet(leaf_7_1.eax, 21);
454    }
455  } else {
456    // When XCR0 is not available (Atom based or older cpus) we need to defer to
457    // the OS via custom code.
458    DetectFeaturesFromOs(info, features);
459    // Now that we have queried the OS for SSE support, we report this back to
460    // os_preserves. This is needed in case of AMD CPU's to enable testing of
461    // sse4a (See ParseExtraAMDCpuId below).
462    if (features->sse) os_preserves->sse_registers = true;
463  }
464}
465
466static void ParseExtraAMDCpuId(const Leaves* leaves, X86Info* info,
467                               OsPreserves os_preserves) {
468  const Leaf leaf_80000001 = leaves->leaf_80000001;
469  const Leaf leaf_80000021 = leaves->leaf_80000021;
470
471  X86Features* const features = &info->features;
472
473  if (os_preserves.sse_registers) {
474    features->sse4a = IsBitSet(leaf_80000001.ecx, 6);
475  }
476
477  if (os_preserves.avx_registers) {
478    features->fma4 = IsBitSet(leaf_80000001.ecx, 16);
479  }
480
481  features->uai = IsBitSet(leaf_80000021.eax, 7);
482}
483
484static const X86Info kEmptyX86Info;
485static const OsPreserves kEmptyOsPreserves;
486
487X86Info GetX86Info(void) {
488  X86Info info = kEmptyX86Info;
489  const Leaves leaves = ReadLeaves();
490  const bool is_intel =
491      IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL);
492  const bool is_amd =
493      IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD);
494  const bool is_hygon =
495      IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_HYGON_GENUINE);
496  const bool is_zhaoxin =
497      (IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_CENTAUR_HAULS) ||
498       IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_SHANGHAI));
499  SetVendor(leaves.leaf_0, info.vendor);
500  if (is_intel || is_amd || is_hygon || is_zhaoxin) {
501    OsPreserves os_preserves = kEmptyOsPreserves;
502    ParseCpuId(&leaves, &info, &os_preserves);
503    if (is_amd || is_hygon) {
504      ParseExtraAMDCpuId(&leaves, &info, os_preserves);
505    }
506  }
507  return info;
508}
509
510////////////////////////////////////////////////////////////////////////////////
511// Microarchitecture
512////////////////////////////////////////////////////////////////////////////////
513
514#define CPUID(FAMILY, MODEL) ((((FAMILY)&0xFF) << 8) | ((MODEL)&0xFF))
515
516X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
517  if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_GENUINE_INTEL)) {
518    switch (CPUID(info->family, info->model)) {
519      case CPUID(0x04, 0x01):
520      case CPUID(0x04, 0x02):
521      case CPUID(0x04, 0x03):
522      case CPUID(0x04, 0x04):
523      case CPUID(0x04, 0x05):
524      case CPUID(0x04, 0x07):
525      case CPUID(0x04, 0x08):
526      case CPUID(0x04, 0x09):
527        // https://en.wikichip.org/wiki/intel/microarchitectures/80486
528        return INTEL_80486;
529      case CPUID(0x05, 0x01):
530      case CPUID(0x05, 0x02):
531      case CPUID(0x05, 0x04):
532      case CPUID(0x05, 0x07):
533      case CPUID(0x05, 0x08):
534        // https://en.wikichip.org/wiki/intel/microarchitectures/p5
535        return INTEL_P5;
536      case CPUID(0x05, 0x09):
537      case CPUID(0x05, 0x0A):
538        // https://en.wikichip.org/wiki/intel/quark
539        return INTEL_LAKEMONT;
540      case CPUID(0x06, 0x1C):  // Intel(R) Atom(TM) CPU 230 @ 1.60GHz
541      case CPUID(0x06, 0x35):
542      case CPUID(0x06, 0x36):
543      case CPUID(0x06, 0x70):  // https://en.wikichip.org/wiki/intel/atom/230
544        // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture)
545        return INTEL_ATOM_BNL;
546      case CPUID(0x06, 0x37):
547      case CPUID(0x06, 0x4C):
548        // https://en.wikipedia.org/wiki/Silvermont
549        return INTEL_ATOM_SMT;
550      case CPUID(0x06, 0x5C):
551        // https://en.wikipedia.org/wiki/Goldmont
552        return INTEL_ATOM_GMT;
553      case CPUID(0x06, 0x7A):
554        // https://en.wikichip.org/wiki/intel/microarchitectures/goldmont_plus
555        return INTEL_ATOM_GMT_PLUS;
556      case CPUID(0x06, 0x8A):
557      case CPUID(0x06, 0x96):
558      case CPUID(0x06, 0x9C):
559        // https://en.wikichip.org/wiki/intel/microarchitectures/tremont
560        return INTEL_ATOM_TMT;
561      case CPUID(0x06, 0x0E):
562      case CPUID(0x06, 0x0F):
563      case CPUID(0x06, 0x16):
564        // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture)
565        return INTEL_CORE;
566      case CPUID(0x06, 0x17):
567      case CPUID(0x06, 0x1D):
568        // https://en.wikipedia.org/wiki/Penryn_(microarchitecture)
569        return INTEL_PNR;
570      case CPUID(0x06, 0x1A):
571      case CPUID(0x06, 0x1E):
572      case CPUID(0x06, 0x1F):
573      case CPUID(0x06, 0x2E):
574        // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture)
575        return INTEL_NHM;
576      case CPUID(0x06, 0x25):
577      case CPUID(0x06, 0x2C):
578      case CPUID(0x06, 0x2F):
579        // https://en.wikipedia.org/wiki/Westmere_(microarchitecture)
580        return INTEL_WSM;
581      case CPUID(0x06, 0x2A):
582      case CPUID(0x06, 0x2D):
583        // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings
584        return INTEL_SNB;
585      case CPUID(0x06, 0x3A):
586      case CPUID(0x06, 0x3E):
587        // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings
588        return INTEL_IVB;
589      case CPUID(0x06, 0x3C):
590      case CPUID(0x06, 0x3F):
591      case CPUID(0x06, 0x45):
592      case CPUID(0x06, 0x46):
593        // https://en.wikipedia.org/wiki/Haswell_(microarchitecture)
594        return INTEL_HSW;
595      case CPUID(0x06, 0x3D):
596      case CPUID(0x06, 0x47):
597      case CPUID(0x06, 0x4F):
598      case CPUID(0x06, 0x56):
599        // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture)
600        return INTEL_BDW;
601      case CPUID(0x06, 0x4E):
602      case CPUID(0x06, 0x5E):
603        // https://en.wikipedia.org/wiki/Skylake_(microarchitecture)
604        return INTEL_SKL;
605      case CPUID(0x06, 0x55):
606        if (info->stepping >= 6 && info->stepping <= 7) {
607          // https://en.wikipedia.org/wiki/Cascade_Lake_(microprocessor)
608          return INTEL_CCL;
609        }
610        return INTEL_SKL;
611      case CPUID(0x06, 0x66):
612        // https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture)
613        return INTEL_CNL;
614      case CPUID(0x06, 0x7D):  // client
615      case CPUID(0x06, 0x7E):  // client
616      case CPUID(0x06, 0x9D):  // NNP-I
617      case CPUID(0x06, 0x6A):  // server
618      case CPUID(0x06, 0x6C):  // server
619        // https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor)
620        return INTEL_ICL;
621      case CPUID(0x06, 0x8C):
622      case CPUID(0x06, 0x8D):
623        // https://en.wikipedia.org/wiki/Tiger_Lake_(microarchitecture)
624        return INTEL_TGL;
625      case CPUID(0x06, 0x8F):
626        // https://en.wikipedia.org/wiki/Sapphire_Rapids
627        return INTEL_SPR;
628      case CPUID(0x06, 0x8E):
629        switch (info->stepping) {
630          case 9:
631            return INTEL_KBL;  // https://en.wikipedia.org/wiki/Kaby_Lake
632          case 10:
633            return INTEL_CFL;  // https://en.wikipedia.org/wiki/Coffee_Lake
634          case 11:
635            return INTEL_WHL;  // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
636          case 12:
637            return INTEL_CML;  // https://en.wikichip.org/wiki/intel/microarchitectures/comet_lake
638          default:
639            return X86_UNKNOWN;
640        }
641      case CPUID(0x06, 0x9E):
642        if (info->stepping > 9) {
643          // https://en.wikipedia.org/wiki/Coffee_Lake
644          return INTEL_CFL;
645        } else {
646          // https://en.wikipedia.org/wiki/Kaby_Lake
647          return INTEL_KBL;
648        }
649      case CPUID(0x06, 0x97):
650      case CPUID(0x06, 0x9A):
651      case CPUID(0x06, 0xBE):
652        // https://en.wikichip.org/wiki/intel/microarchitectures/alder_lake
653        return INTEL_ADL;
654      case CPUID(0x06, 0xA5):
655      case CPUID(0x06, 0xA6):
656        // https://en.wikichip.org/wiki/intel/microarchitectures/comet_lake
657        return INTEL_CML;
658      case CPUID(0x06, 0xA7):
659        // https://en.wikichip.org/wiki/intel/microarchitectures/rocket_lake
660        return INTEL_RCL;
661      case CPUID(0x06, 0xB7):
662      case CPUID(0x06, 0xBA):
663      case CPUID(0x06, 0xBF):
664        // https://en.wikichip.org/wiki/intel/microarchitectures/raptor_lake
665        return INTEL_RPL;
666      case CPUID(0x06, 0x85):
667        // https://en.wikichip.org/wiki/intel/microarchitectures/knights_mill
668        return INTEL_KNIGHTS_M;
669      case CPUID(0x06, 0x57):
670        // https://en.wikichip.org/wiki/intel/microarchitectures/knights_landing
671        return INTEL_KNIGHTS_L;
672      case CPUID(0x0B, 0x00):
673        // https://en.wikichip.org/wiki/intel/microarchitectures/knights_ferry
674        return INTEL_KNIGHTS_F;
675      case CPUID(0x0B, 0x01):
676        // https://en.wikichip.org/wiki/intel/microarchitectures/knights_corner
677        return INTEL_KNIGHTS_C;
678      case CPUID(0x0F, 0x01):
679      case CPUID(0x0F, 0x02):
680      case CPUID(0x0F, 0x03):
681      case CPUID(0x0F, 0x04):
682      case CPUID(0x0F, 0x06):
683        // https://en.wikichip.org/wiki/intel/microarchitectures/netburst
684        return INTEL_NETBURST;
685      default:
686        return X86_UNKNOWN;
687    }
688  }
689  if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_CENTAUR_HAULS)) {
690    switch (CPUID(info->family, info->model)) {
691      case CPUID(0x06, 0x0F):
692      case CPUID(0x06, 0x19):
693        // https://en.wikichip.org/wiki/zhaoxin/microarchitectures/zhangjiang
694        return ZHAOXIN_ZHANGJIANG;
695      case CPUID(0x07, 0x1B):
696        // https://en.wikichip.org/wiki/zhaoxin/microarchitectures/wudaokou
697        return ZHAOXIN_WUDAOKOU;
698      case CPUID(0x07, 0x3B):
699        // https://en.wikichip.org/wiki/zhaoxin/microarchitectures/lujiazui
700        return ZHAOXIN_LUJIAZUI;
701      case CPUID(0x07, 0x5B):
702        return ZHAOXIN_YONGFENG;
703      default:
704        return X86_UNKNOWN;
705    }
706  }
707  if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_SHANGHAI)) {
708    switch (CPUID(info->family, info->model)) {
709      case CPUID(0x06, 0x0F):
710      case CPUID(0x06, 0x19):
711        // https://en.wikichip.org/wiki/zhaoxin/microarchitectures/zhangjiang
712        return ZHAOXIN_ZHANGJIANG;
713      case CPUID(0x07, 0x1B):
714        // https://en.wikichip.org/wiki/zhaoxin/microarchitectures/wudaokou
715        return ZHAOXIN_WUDAOKOU;
716      case CPUID(0x07, 0x3B):
717        // https://en.wikichip.org/wiki/zhaoxin/microarchitectures/lujiazui
718        return ZHAOXIN_LUJIAZUI;
719      case CPUID(0x07, 0x5B):
720        return ZHAOXIN_YONGFENG;
721      default:
722        return X86_UNKNOWN;
723    }
724  }
725  if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_AUTHENTIC_AMD)) {
726    switch (CPUID(info->family, info->model)) {
727      // https://en.wikichip.org/wiki/amd/cpuid
728      case CPUID(0xF, 0x04):
729      case CPUID(0xF, 0x05):
730      case CPUID(0xF, 0x07):
731      case CPUID(0xF, 0x08):
732      case CPUID(0xF, 0x0C):
733      case CPUID(0xF, 0x0E):
734      case CPUID(0xF, 0x0F):
735      case CPUID(0xF, 0x14):
736      case CPUID(0xF, 0x15):
737      case CPUID(0xF, 0x17):
738      case CPUID(0xF, 0x18):
739      case CPUID(0xF, 0x1B):
740      case CPUID(0xF, 0x1C):
741      case CPUID(0xF, 0x1F):
742      case CPUID(0xF, 0x21):
743      case CPUID(0xF, 0x23):
744      case CPUID(0xF, 0x24):
745      case CPUID(0xF, 0x25):
746      case CPUID(0xF, 0x27):
747      case CPUID(0xF, 0x2B):
748      case CPUID(0xF, 0x2C):
749      case CPUID(0xF, 0x2F):
750      case CPUID(0xF, 0x41):
751      case CPUID(0xF, 0x43):
752      case CPUID(0xF, 0x48):
753      case CPUID(0xF, 0x4B):
754      case CPUID(0xF, 0x4C):
755      case CPUID(0xF, 0x4F):
756      case CPUID(0xF, 0x5D):
757      case CPUID(0xF, 0x5F):
758      case CPUID(0xF, 0x68):
759      case CPUID(0xF, 0x6B):
760      case CPUID(0xF, 0x6F):
761      case CPUID(0xF, 0x7F):
762      case CPUID(0xF, 0xC1):
763        return AMD_HAMMER;
764      case CPUID(0x10, 0x02):
765      case CPUID(0x10, 0x04):
766      case CPUID(0x10, 0x05):
767      case CPUID(0x10, 0x06):
768      case CPUID(0x10, 0x08):
769      case CPUID(0x10, 0x09):
770      case CPUID(0x10, 0x0A):
771        return AMD_K10;
772      case CPUID(0x11, 0x03):
773        // http://developer.amd.com/wordpress/media/2012/10/41788.pdf
774        return AMD_K11;
775      case CPUID(0x12, 0x00):
776      case CPUID(0x12, 0x01):
777        // https://www.amd.com/system/files/TechDocs/44739_12h_Rev_Gd.pdf
778        return AMD_K12;
779      case CPUID(0x14, 0x00):
780      case CPUID(0x14, 0x01):
781      case CPUID(0x14, 0x02):
782        // https://www.amd.com/system/files/TechDocs/47534_14h_Mod_00h-0Fh_Rev_Guide.pdf
783        return AMD_BOBCAT;
784      case CPUID(0x15, 0x01):
785        // https://en.wikichip.org/wiki/amd/microarchitectures/bulldozer
786        return AMD_BULLDOZER;
787      case CPUID(0x15, 0x02):
788      case CPUID(0x15, 0x10):
789      case CPUID(0x15, 0x11):
790      case CPUID(0x15, 0x13):
791        // https://en.wikichip.org/wiki/amd/microarchitectures/piledriver
792        // https://www.amd.com/system/files/TechDocs/48931_15h_Mod_10h-1Fh_Rev_Guide.pdf
793        return AMD_PILEDRIVER;
794      case CPUID(0x15, 0x30):
795      case CPUID(0x15, 0x38):
796        // https://en.wikichip.org/wiki/amd/microarchitectures/steamroller
797        return AMD_STREAMROLLER;
798      case CPUID(0x15, 0x60):
799      case CPUID(0x15, 0x65):
800      case CPUID(0x15, 0x70):
801        // https://en.wikichip.org/wiki/amd/microarchitectures/excavator
802        return AMD_EXCAVATOR;
803      case CPUID(0x16, 0x00):
804      case CPUID(0x16, 0x26):
805        return AMD_JAGUAR;
806      case CPUID(0x16, 0x30):
807        return AMD_PUMA;
808      case CPUID(0x17, 0x01):
809      case CPUID(0x17, 0x11):
810      case CPUID(0x17, 0x18):
811      case CPUID(0x17, 0x20):
812        // https://en.wikichip.org/wiki/amd/microarchitectures/zen
813        return AMD_ZEN;
814      case CPUID(0x17, 0x08):
815        // https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B
816        return AMD_ZEN_PLUS;
817      case CPUID(0x17, 0x31):
818      case CPUID(0x17, 0x47):
819      case CPUID(0x17, 0x60):
820      case CPUID(0x17, 0x68):
821      case CPUID(0x17, 0x71):
822      case CPUID(0x17, 0x84):
823      case CPUID(0x17, 0x90):
824      case CPUID(0x17, 0x98):
825      case CPUID(0x17, 0xA0):
826        // https://en.wikichip.org/wiki/amd/microarchitectures/zen_2
827        return AMD_ZEN2;
828      case CPUID(0x19, 0x00):
829      case CPUID(0x19, 0x01):
830      case CPUID(0x19, 0x08):
831      case CPUID(0x19, 0x21):
832      case CPUID(0x19, 0x30):
833      case CPUID(0x19, 0x40):
834      case CPUID(0x19, 0x44):
835      case CPUID(0x19, 0x50):
836        // https://en.wikichip.org/wiki/amd/microarchitectures/zen_3
837        return AMD_ZEN3;
838      case CPUID(0x19, 0x10):
839      case CPUID(0x19, 0x11):
840      case CPUID(0x19, 0x61):
841      case CPUID(0x19, 0x74):
842        // https://en.wikichip.org/wiki/amd/microarchitectures/zen_4
843        return AMD_ZEN4;
844      default:
845        return X86_UNKNOWN;
846    }
847  }
848  if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_HYGON_GENUINE)) {
849    switch (CPUID(info->family, info->model)) {
850      case CPUID(0x18, 0x00):
851      case CPUID(0x18, 0x01):
852        return AMD_ZEN;
853    }
854  }
855  return X86_UNKNOWN;
856}
857
858////////////////////////////////////////////////////////////////////////////////
859// CacheInfo
860////////////////////////////////////////////////////////////////////////////////
861
862static const CacheLevelInfo kEmptyCacheLevelInfo;
863
864static CacheLevelInfo GetCacheLevelInfo(const uint32_t reg) {
865  const int UNDEF = -1;
866  const int KiB = 1024;
867  const int MiB = 1024 * KiB;
868  switch (reg) {
869    case 0x01:
870      return (CacheLevelInfo){.level = UNDEF,
871                              .cache_type = CPU_FEATURE_CACHE_TLB,
872                              .cache_size = 4 * KiB,
873                              .ways = 4,
874                              .line_size = UNDEF,
875                              .tlb_entries = 32,
876                              .partitioning = 0};
877    case 0x02:
878      return (CacheLevelInfo){.level = UNDEF,
879                              .cache_type = CPU_FEATURE_CACHE_TLB,
880                              .cache_size = 4 * MiB,
881                              .ways = 0xFF,
882                              .line_size = UNDEF,
883                              .tlb_entries = 2,
884                              .partitioning = 0};
885    case 0x03:
886      return (CacheLevelInfo){.level = UNDEF,
887                              .cache_type = CPU_FEATURE_CACHE_TLB,
888                              .cache_size = 4 * KiB,
889                              .ways = 4,
890                              .line_size = UNDEF,
891                              .tlb_entries = 64,
892                              .partitioning = 0};
893    case 0x04:
894      return (CacheLevelInfo){.level = UNDEF,
895                              .cache_type = CPU_FEATURE_CACHE_TLB,
896                              .cache_size = 4 * MiB,
897                              .ways = 4,
898                              .line_size = UNDEF,
899                              .tlb_entries = 8,
900                              .partitioning = 0};
901    case 0x05:
902      return (CacheLevelInfo){.level = UNDEF,
903                              .cache_type = CPU_FEATURE_CACHE_TLB,
904                              .cache_size = 4 * MiB,
905                              .ways = 4,
906                              .line_size = UNDEF,
907                              .tlb_entries = 32,
908                              .partitioning = 0};
909    case 0x06:
910      return (CacheLevelInfo){.level = 1,
911                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
912                              .cache_size = 8 * KiB,
913                              .ways = 4,
914                              .line_size = 32,
915                              .tlb_entries = UNDEF,
916                              .partitioning = 0};
917    case 0x08:
918      return (CacheLevelInfo){.level = 1,
919                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
920                              .cache_size = 16 * KiB,
921                              .ways = 4,
922                              .line_size = 32,
923                              .tlb_entries = UNDEF,
924                              .partitioning = 0};
925    case 0x09:
926      return (CacheLevelInfo){.level = 1,
927                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
928                              .cache_size = 32 * KiB,
929                              .ways = 4,
930                              .line_size = 64,
931                              .tlb_entries = UNDEF,
932                              .partitioning = 0};
933    case 0x0A:
934      return (CacheLevelInfo){.level = 1,
935                              .cache_type = CPU_FEATURE_CACHE_DATA,
936                              .cache_size = 8 * KiB,
937                              .ways = 2,
938                              .line_size = 32,
939                              .tlb_entries = UNDEF,
940                              .partitioning = 0};
941    case 0x0B:
942      return (CacheLevelInfo){.level = UNDEF,
943                              .cache_type = CPU_FEATURE_CACHE_TLB,
944                              .cache_size = 4 * MiB,
945                              .ways = 4,
946                              .line_size = UNDEF,
947                              .tlb_entries = 4,
948                              .partitioning = 0};
949    case 0x0C:
950      return (CacheLevelInfo){.level = 1,
951                              .cache_type = CPU_FEATURE_CACHE_DATA,
952                              .cache_size = 16 * KiB,
953                              .ways = 4,
954                              .line_size = 32,
955                              .tlb_entries = UNDEF,
956                              .partitioning = 0};
957    case 0x0D:
958      return (CacheLevelInfo){.level = 1,
959                              .cache_type = CPU_FEATURE_CACHE_DATA,
960                              .cache_size = 16 * KiB,
961                              .ways = 4,
962                              .line_size = 64,
963                              .tlb_entries = UNDEF,
964                              .partitioning = 0};
965    case 0x0E:
966      return (CacheLevelInfo){.level = 1,
967                              .cache_type = CPU_FEATURE_CACHE_DATA,
968                              .cache_size = 24 * KiB,
969                              .ways = 6,
970                              .line_size = 64,
971                              .tlb_entries = UNDEF,
972                              .partitioning = 0};
973    case 0x1D:
974      return (CacheLevelInfo){.level = 2,
975                              .cache_type = CPU_FEATURE_CACHE_DATA,
976                              .cache_size = 128 * KiB,
977                              .ways = 2,
978                              .line_size = 64,
979                              .tlb_entries = UNDEF,
980                              .partitioning = 0};
981    case 0x21:
982      return (CacheLevelInfo){.level = 2,
983                              .cache_type = CPU_FEATURE_CACHE_DATA,
984                              .cache_size = 256 * KiB,
985                              .ways = 8,
986                              .line_size = 64,
987                              .tlb_entries = UNDEF,
988                              .partitioning = 0};
989    case 0x22:
990      return (CacheLevelInfo){.level = 3,
991                              .cache_type = CPU_FEATURE_CACHE_DATA,
992                              .cache_size = 512 * KiB,
993                              .ways = 4,
994                              .line_size = 64,
995                              .tlb_entries = UNDEF,
996                              .partitioning = 2};
997    case 0x23:
998      return (CacheLevelInfo){.level = 3,
999                              .cache_type = CPU_FEATURE_CACHE_DATA,
1000                              .cache_size = 1 * MiB,
1001                              .ways = 8,
1002                              .line_size = 64,
1003                              .tlb_entries = UNDEF,
1004                              .partitioning = 2};
1005    case 0x24:
1006      return (CacheLevelInfo){.level = 2,
1007                              .cache_type = CPU_FEATURE_CACHE_DATA,
1008                              .cache_size = 1 * MiB,
1009                              .ways = 16,
1010                              .line_size = 64,
1011                              .tlb_entries = UNDEF,
1012                              .partitioning = 0};
1013    case 0x25:
1014      return (CacheLevelInfo){.level = 3,
1015                              .cache_type = CPU_FEATURE_CACHE_DATA,
1016                              .cache_size = 2 * MiB,
1017                              .ways = 8,
1018                              .line_size = 64,
1019                              .tlb_entries = UNDEF,
1020                              .partitioning = 2};
1021    case 0x29:
1022      return (CacheLevelInfo){.level = 3,
1023                              .cache_type = CPU_FEATURE_CACHE_DATA,
1024                              .cache_size = 4 * MiB,
1025                              .ways = 8,
1026                              .line_size = 64,
1027                              .tlb_entries = UNDEF,
1028                              .partitioning = 2};
1029    case 0x2C:
1030      return (CacheLevelInfo){.level = 1,
1031                              .cache_type = CPU_FEATURE_CACHE_DATA,
1032                              .cache_size = 32 * KiB,
1033                              .ways = 8,
1034                              .line_size = 64,
1035                              .tlb_entries = UNDEF,
1036                              .partitioning = 0};
1037    case 0x30:
1038      return (CacheLevelInfo){.level = 1,
1039                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
1040                              .cache_size = 32 * KiB,
1041                              .ways = 8,
1042                              .line_size = 64,
1043                              .tlb_entries = UNDEF,
1044                              .partitioning = 0};
1045    case 0x40:
1046      return (CacheLevelInfo){.level = UNDEF,
1047                              .cache_type = CPU_FEATURE_CACHE_DATA,
1048                              .cache_size = UNDEF,
1049                              .ways = UNDEF,
1050                              .line_size = UNDEF,
1051                              .tlb_entries = UNDEF,
1052                              .partitioning = 0};
1053    case 0x41:
1054      return (CacheLevelInfo){.level = 2,
1055                              .cache_type = CPU_FEATURE_CACHE_DATA,
1056                              .cache_size = 128 * KiB,
1057                              .ways = 4,
1058                              .line_size = 32,
1059                              .tlb_entries = UNDEF,
1060                              .partitioning = 0};
1061    case 0x42:
1062      return (CacheLevelInfo){.level = 2,
1063                              .cache_type = CPU_FEATURE_CACHE_DATA,
1064                              .cache_size = 256 * KiB,
1065                              .ways = 4,
1066                              .line_size = 32,
1067                              .tlb_entries = UNDEF,
1068                              .partitioning = 0};
1069    case 0x43:
1070      return (CacheLevelInfo){.level = 2,
1071                              .cache_type = CPU_FEATURE_CACHE_DATA,
1072                              .cache_size = 512 * KiB,
1073                              .ways = 4,
1074                              .line_size = 32,
1075                              .tlb_entries = UNDEF,
1076                              .partitioning = 0};
1077    case 0x44:
1078      return (CacheLevelInfo){.level = 2,
1079                              .cache_type = CPU_FEATURE_CACHE_DATA,
1080                              .cache_size = 1 * MiB,
1081                              .ways = 4,
1082                              .line_size = 32,
1083                              .tlb_entries = UNDEF,
1084                              .partitioning = 0};
1085    case 0x45:
1086      return (CacheLevelInfo){.level = 2,
1087                              .cache_type = CPU_FEATURE_CACHE_DATA,
1088                              .cache_size = 2 * MiB,
1089                              .ways = 4,
1090                              .line_size = 32,
1091                              .tlb_entries = UNDEF,
1092                              .partitioning = 0};
1093    case 0x46:
1094      return (CacheLevelInfo){.level = 3,
1095                              .cache_type = CPU_FEATURE_CACHE_DATA,
1096                              .cache_size = 4 * MiB,
1097                              .ways = 4,
1098                              .line_size = 64,
1099                              .tlb_entries = UNDEF,
1100                              .partitioning = 0};
1101    case 0x47:
1102      return (CacheLevelInfo){.level = 3,
1103                              .cache_type = CPU_FEATURE_CACHE_DATA,
1104                              .cache_size = 8 * MiB,
1105                              .ways = 8,
1106                              .line_size = 64,
1107                              .tlb_entries = UNDEF,
1108                              .partitioning = 0};
1109    case 0x48:
1110      return (CacheLevelInfo){.level = 2,
1111                              .cache_type = CPU_FEATURE_CACHE_DATA,
1112                              .cache_size = 3 * MiB,
1113                              .ways = 12,
1114                              .line_size = 64,
1115                              .tlb_entries = UNDEF,
1116                              .partitioning = 0};
1117    case 0x49:
1118      return (CacheLevelInfo){.level = 2,
1119                              .cache_type = CPU_FEATURE_CACHE_DATA,
1120                              .cache_size = 4 * MiB,
1121                              .ways = 16,
1122                              .line_size = 64,
1123                              .tlb_entries = UNDEF,
1124                              .partitioning = 0};
1125    case (0x49 | (1 << 8)):
1126      return (CacheLevelInfo){.level = 3,
1127                              .cache_type = CPU_FEATURE_CACHE_DATA,
1128                              .cache_size = 4 * MiB,
1129                              .ways = 16,
1130                              .line_size = 64,
1131                              .tlb_entries = UNDEF,
1132                              .partitioning = 0};
1133    case 0x4A:
1134      return (CacheLevelInfo){.level = 3,
1135                              .cache_type = CPU_FEATURE_CACHE_DATA,
1136                              .cache_size = 6 * MiB,
1137                              .ways = 12,
1138                              .line_size = 64,
1139                              .tlb_entries = UNDEF,
1140                              .partitioning = 0};
1141    case 0x4B:
1142      return (CacheLevelInfo){.level = 3,
1143                              .cache_type = CPU_FEATURE_CACHE_DATA,
1144                              .cache_size = 8 * MiB,
1145                              .ways = 16,
1146                              .line_size = 64,
1147                              .tlb_entries = UNDEF,
1148                              .partitioning = 0};
1149    case 0x4C:
1150      return (CacheLevelInfo){.level = 3,
1151                              .cache_type = CPU_FEATURE_CACHE_DATA,
1152                              .cache_size = 12 * MiB,
1153                              .ways = 12,
1154                              .line_size = 64,
1155                              .tlb_entries = UNDEF,
1156                              .partitioning = 0};
1157    case 0x4D:
1158      return (CacheLevelInfo){.level = 3,
1159                              .cache_type = CPU_FEATURE_CACHE_DATA,
1160                              .cache_size = 16 * MiB,
1161                              .ways = 16,
1162                              .line_size = 64,
1163                              .tlb_entries = UNDEF,
1164                              .partitioning = 0};
1165    case 0x4E:
1166      return (CacheLevelInfo){.level = 2,
1167                              .cache_type = CPU_FEATURE_CACHE_DATA,
1168                              .cache_size = 6 * MiB,
1169                              .ways = 24,
1170                              .line_size = 64,
1171                              .tlb_entries = UNDEF,
1172                              .partitioning = 0};
1173    case 0x4F:
1174      return (CacheLevelInfo){.level = UNDEF,
1175                              .cache_type = CPU_FEATURE_CACHE_TLB,
1176                              .cache_size = 4 * KiB,
1177                              .ways = UNDEF,
1178                              .line_size = UNDEF,
1179                              .tlb_entries = 32,
1180                              .partitioning = 0};
1181    case 0x50:
1182      return (CacheLevelInfo){.level = UNDEF,
1183                              .cache_type = CPU_FEATURE_CACHE_TLB,
1184                              .cache_size = 4 * KiB,
1185                              .ways = UNDEF,
1186                              .line_size = UNDEF,
1187                              .tlb_entries = 64,
1188                              .partitioning = 0};
1189    case 0x51:
1190      return (CacheLevelInfo){.level = UNDEF,
1191                              .cache_type = CPU_FEATURE_CACHE_TLB,
1192                              .cache_size = 4 * KiB,
1193                              .ways = UNDEF,
1194                              .line_size = UNDEF,
1195                              .tlb_entries = 128,
1196                              .partitioning = 0};
1197    case 0x52:
1198      return (CacheLevelInfo){.level = UNDEF,
1199                              .cache_type = CPU_FEATURE_CACHE_TLB,
1200                              .cache_size = 4 * KiB,
1201                              .ways = UNDEF,
1202                              .line_size = UNDEF,
1203                              .tlb_entries = 256,
1204                              .partitioning = 0};
1205    case 0x55:
1206      return (CacheLevelInfo){.level = UNDEF,
1207                              .cache_type = CPU_FEATURE_CACHE_TLB,
1208                              .cache_size = 2 * MiB,
1209                              .ways = 0xFF,
1210                              .line_size = UNDEF,
1211                              .tlb_entries = 7,
1212                              .partitioning = 0};
1213    case 0x56:
1214      return (CacheLevelInfo){.level = UNDEF,
1215                              .cache_type = CPU_FEATURE_CACHE_TLB,
1216                              .cache_size = 4 * MiB,
1217                              .ways = 4,
1218                              .line_size = UNDEF,
1219                              .tlb_entries = 16,
1220                              .partitioning = 0};
1221    case 0x57:
1222      return (CacheLevelInfo){.level = UNDEF,
1223                              .cache_type = CPU_FEATURE_CACHE_TLB,
1224                              .cache_size = 4 * KiB,
1225                              .ways = 4,
1226                              .line_size = UNDEF,
1227                              .tlb_entries = 16,
1228                              .partitioning = 0};
1229    case 0x59:
1230      return (CacheLevelInfo){.level = UNDEF,
1231                              .cache_type = CPU_FEATURE_CACHE_TLB,
1232                              .cache_size = 4 * KiB,
1233                              .ways = 0xFF,
1234                              .line_size = UNDEF,
1235                              .tlb_entries = 16,
1236                              .partitioning = 0};
1237    case 0x5A:
1238      return (CacheLevelInfo){.level = UNDEF,
1239                              .cache_type = CPU_FEATURE_CACHE_TLB,
1240                              .cache_size = 2 * MiB,
1241                              .ways = 4,
1242                              .line_size = UNDEF,
1243                              .tlb_entries = 32,
1244                              .partitioning = 0};
1245    case 0x5B:
1246      return (CacheLevelInfo){.level = UNDEF,
1247                              .cache_type = CPU_FEATURE_CACHE_TLB,
1248                              .cache_size = 4 * KiB,
1249                              .ways = UNDEF,
1250                              .line_size = UNDEF,
1251                              .tlb_entries = 64,
1252                              .partitioning = 0};
1253    case 0x5C:
1254      return (CacheLevelInfo){.level = UNDEF,
1255                              .cache_type = CPU_FEATURE_CACHE_TLB,
1256                              .cache_size = 4 * KiB,
1257                              .ways = UNDEF,
1258                              .line_size = UNDEF,
1259                              .tlb_entries = 128,
1260                              .partitioning = 0};
1261    case 0x5D:
1262      return (CacheLevelInfo){.level = UNDEF,
1263                              .cache_type = CPU_FEATURE_CACHE_TLB,
1264                              .cache_size = 4,
1265                              .ways = UNDEF,
1266                              .line_size = UNDEF,
1267                              .tlb_entries = 256,
1268                              .partitioning = 0};
1269    case 0x60:
1270      return (CacheLevelInfo){.level = 1,
1271                              .cache_type = CPU_FEATURE_CACHE_DATA,
1272                              .cache_size = 16 * KiB,
1273                              .ways = 8,
1274                              .line_size = 64,
1275                              .tlb_entries = UNDEF,
1276                              .partitioning = 0};
1277    case 0x61:
1278      return (CacheLevelInfo){.level = UNDEF,
1279                              .cache_type = CPU_FEATURE_CACHE_TLB,
1280                              .cache_size = 4 * KiB,
1281                              .ways = 0xFF,
1282                              .line_size = UNDEF,
1283                              .tlb_entries = 48,
1284                              .partitioning = 0};
1285    case 0x63:
1286      return (CacheLevelInfo){.level = UNDEF,
1287                              .cache_type = CPU_FEATURE_CACHE_TLB,
1288                              .cache_size = 2 * MiB,
1289                              .ways = 4,
1290                              .line_size = UNDEF,
1291                              .tlb_entries = 4,
1292                              .partitioning = 0};
1293    case 0x66:
1294      return (CacheLevelInfo){.level = 1,
1295                              .cache_type = CPU_FEATURE_CACHE_DATA,
1296                              .cache_size = 8 * KiB,
1297                              .ways = 4,
1298                              .line_size = 64,
1299                              .tlb_entries = UNDEF,
1300                              .partitioning = 0};
1301    case 0x67:
1302      return (CacheLevelInfo){.level = 1,
1303                              .cache_type = CPU_FEATURE_CACHE_DATA,
1304                              .cache_size = 16 * KiB,
1305                              .ways = 4,
1306                              .line_size = 64,
1307                              .tlb_entries = UNDEF,
1308                              .partitioning = 0};
1309    case 0x68:
1310      return (CacheLevelInfo){.level = 1,
1311                              .cache_type = CPU_FEATURE_CACHE_DATA,
1312                              .cache_size = 32 * KiB,
1313                              .ways = 4,
1314                              .line_size = 64,
1315                              .tlb_entries = UNDEF,
1316                              .partitioning = 0};
1317    case 0x70:
1318      return (CacheLevelInfo){.level = 1,
1319                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
1320                              .cache_size = 12 * KiB,
1321                              .ways = 8,
1322                              .line_size = UNDEF,
1323                              .tlb_entries = UNDEF,
1324                              .partitioning = 0};
1325    case 0x71:
1326      return (CacheLevelInfo){.level = 1,
1327                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
1328                              .cache_size = 16 * KiB,
1329                              .ways = 8,
1330                              .line_size = UNDEF,
1331                              .tlb_entries = UNDEF,
1332                              .partitioning = 0};
1333    case 0x72:
1334      return (CacheLevelInfo){.level = 1,
1335                              .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
1336                              .cache_size = 32 * KiB,
1337                              .ways = 8,
1338                              .line_size = UNDEF,
1339                              .tlb_entries = UNDEF,
1340                              .partitioning = 0};
1341    case 0x76:
1342      return (CacheLevelInfo){.level = UNDEF,
1343                              .cache_type = CPU_FEATURE_CACHE_TLB,
1344                              .cache_size = 2 * MiB,
1345                              .ways = 0xFF,
1346                              .line_size = UNDEF,
1347                              .tlb_entries = 8,
1348                              .partitioning = 0};
1349    case 0x78:
1350      return (CacheLevelInfo){.level = 2,
1351                              .cache_type = CPU_FEATURE_CACHE_DATA,
1352                              .cache_size = 1 * MiB,
1353                              .ways = 4,
1354                              .line_size = 64,
1355                              .tlb_entries = UNDEF,
1356                              .partitioning = 0};
1357    case 0x79:
1358      return (CacheLevelInfo){.level = 2,
1359                              .cache_type = CPU_FEATURE_CACHE_DATA,
1360                              .cache_size = 128 * KiB,
1361                              .ways = 8,
1362                              .line_size = 64,
1363                              .tlb_entries = UNDEF,
1364                              .partitioning = 2};
1365    case 0x7A:
1366      return (CacheLevelInfo){.level = 2,
1367                              .cache_type = CPU_FEATURE_CACHE_DATA,
1368                              .cache_size = 256 * KiB,
1369                              .ways = 8,
1370                              .line_size = 64,
1371                              .tlb_entries = UNDEF,
1372                              .partitioning = 2};
1373    case 0x7B:
1374      return (CacheLevelInfo){.level = 2,
1375                              .cache_type = CPU_FEATURE_CACHE_DATA,
1376                              .cache_size = 512 * KiB,
1377                              .ways = 8,
1378                              .line_size = 64,
1379                              .tlb_entries = UNDEF,
1380                              .partitioning = 2};
1381    case 0x7C:
1382      return (CacheLevelInfo){.level = 2,
1383                              .cache_type = CPU_FEATURE_CACHE_DATA,
1384                              .cache_size = 1 * MiB,
1385                              .ways = 8,
1386                              .line_size = 64,
1387                              .tlb_entries = UNDEF,
1388                              .partitioning = 2};
1389    case 0x7D:
1390      return (CacheLevelInfo){.level = 2,
1391                              .cache_type = CPU_FEATURE_CACHE_DATA,
1392                              .cache_size = 2 * MiB,
1393                              .ways = 8,
1394                              .line_size = 64,
1395                              .tlb_entries = UNDEF,
1396                              .partitioning = 0};
1397    case 0x7F:
1398      return (CacheLevelInfo){.level = 2,
1399                              .cache_type = CPU_FEATURE_CACHE_DATA,
1400                              .cache_size = 512 * KiB,
1401                              .ways = 2,
1402                              .line_size = 64,
1403                              .tlb_entries = UNDEF,
1404                              .partitioning = 0};
1405    case 0x80:
1406      return (CacheLevelInfo){.level = 2,
1407                              .cache_type = CPU_FEATURE_CACHE_DATA,
1408                              .cache_size = 512 * KiB,
1409                              .ways = 8,
1410                              .line_size = 64,
1411                              .tlb_entries = UNDEF,
1412                              .partitioning = 0};
1413    case 0x82:
1414      return (CacheLevelInfo){.level = 2,
1415                              .cache_type = CPU_FEATURE_CACHE_DATA,
1416                              .cache_size = 256 * KiB,
1417                              .ways = 8,
1418                              .line_size = 32,
1419                              .tlb_entries = UNDEF,
1420                              .partitioning = 0};
1421    case 0x83:
1422      return (CacheLevelInfo){.level = 2,
1423                              .cache_type = CPU_FEATURE_CACHE_DATA,
1424                              .cache_size = 512 * KiB,
1425                              .ways = 8,
1426                              .line_size = 32,
1427                              .tlb_entries = UNDEF,
1428                              .partitioning = 0};
1429    case 0x84:
1430      return (CacheLevelInfo){.level = 2,
1431                              .cache_type = CPU_FEATURE_CACHE_DATA,
1432                              .cache_size = 1 * MiB,
1433                              .ways = 8,
1434                              .line_size = 32,
1435                              .tlb_entries = UNDEF,
1436                              .partitioning = 0};
1437    case 0x85:
1438      return (CacheLevelInfo){.level = 2,
1439                              .cache_type = CPU_FEATURE_CACHE_DATA,
1440                              .cache_size = 2 * MiB,
1441                              .ways = 8,
1442                              .line_size = 32,
1443                              .tlb_entries = UNDEF,
1444                              .partitioning = 0};
1445    case 0x86:
1446      return (CacheLevelInfo){.level = 2,
1447                              .cache_type = CPU_FEATURE_CACHE_DATA,
1448                              .cache_size = 512 * KiB,
1449                              .ways = 4,
1450                              .line_size = 32,
1451                              .tlb_entries = UNDEF,
1452                              .partitioning = 0};
1453    case 0x87:
1454      return (CacheLevelInfo){.level = 2,
1455                              .cache_type = CPU_FEATURE_CACHE_DATA,
1456                              .cache_size = 1 * MiB,
1457                              .ways = 8,
1458                              .line_size = 64,
1459                              .tlb_entries = UNDEF,
1460                              .partitioning = 0};
1461    case 0xA0:
1462      return (CacheLevelInfo){.level = UNDEF,
1463                              .cache_type = CPU_FEATURE_CACHE_DTLB,
1464                              .cache_size = 4 * KiB,
1465                              .ways = 0xFF,
1466                              .line_size = UNDEF,
1467                              .tlb_entries = 32,
1468                              .partitioning = 0};
1469    case 0xB0:
1470      return (CacheLevelInfo){.level = UNDEF,
1471                              .cache_type = CPU_FEATURE_CACHE_TLB,
1472                              .cache_size = 4 * KiB,
1473                              .ways = 4,
1474                              .line_size = UNDEF,
1475                              .tlb_entries = 128,
1476                              .partitioning = 0};
1477    case 0xB1:
1478      return (CacheLevelInfo){.level = UNDEF,
1479                              .cache_type = CPU_FEATURE_CACHE_TLB,
1480                              .cache_size = 2 * MiB,
1481                              .ways = 4,
1482                              .line_size = UNDEF,
1483                              .tlb_entries = 8,
1484                              .partitioning = 0};
1485    case 0xB2:
1486      return (CacheLevelInfo){.level = UNDEF,
1487                              .cache_type = CPU_FEATURE_CACHE_TLB,
1488                              .cache_size = 4 * KiB,
1489                              .ways = 4,
1490                              .line_size = UNDEF,
1491                              .tlb_entries = 64,
1492                              .partitioning = 0};
1493    case 0xB3:
1494      return (CacheLevelInfo){.level = UNDEF,
1495                              .cache_type = CPU_FEATURE_CACHE_TLB,
1496                              .cache_size = 4 * KiB,
1497                              .ways = 4,
1498                              .line_size = UNDEF,
1499                              .tlb_entries = 128,
1500                              .partitioning = 0};
1501    case 0xB4:
1502      return (CacheLevelInfo){.level = UNDEF,
1503                              .cache_type = CPU_FEATURE_CACHE_TLB,
1504                              .cache_size = 4 * KiB,
1505                              .ways = 4,
1506                              .line_size = UNDEF,
1507                              .tlb_entries = 256,
1508                              .partitioning = 0};
1509    case 0xB5:
1510      return (CacheLevelInfo){.level = UNDEF,
1511                              .cache_type = CPU_FEATURE_CACHE_TLB,
1512                              .cache_size = 4 * KiB,
1513                              .ways = 8,
1514                              .line_size = UNDEF,
1515                              .tlb_entries = 64,
1516                              .partitioning = 0};
1517    case 0xB6:
1518      return (CacheLevelInfo){.level = UNDEF,
1519                              .cache_type = CPU_FEATURE_CACHE_TLB,
1520                              .cache_size = 4 * KiB,
1521                              .ways = 8,
1522                              .line_size = UNDEF,
1523                              .tlb_entries = 128,
1524                              .partitioning = 0};
1525    case 0xBA:
1526      return (CacheLevelInfo){.level = UNDEF,
1527                              .cache_type = CPU_FEATURE_CACHE_TLB,
1528                              .cache_size = 4 * KiB,
1529                              .ways = 4,
1530                              .line_size = UNDEF,
1531                              .tlb_entries = 64,
1532                              .partitioning = 0};
1533    case 0xC0:
1534      return (CacheLevelInfo){.level = UNDEF,
1535                              .cache_type = CPU_FEATURE_CACHE_TLB,
1536                              .cache_size = 4 * KiB,
1537                              .ways = 4,
1538                              .line_size = UNDEF,
1539                              .tlb_entries = 8,
1540                              .partitioning = 0};
1541    case 0xC1:
1542      return (CacheLevelInfo){.level = UNDEF,
1543                              .cache_type = CPU_FEATURE_CACHE_STLB,
1544                              .cache_size = 4 * KiB,
1545                              .ways = 8,
1546                              .line_size = UNDEF,
1547                              .tlb_entries = 1024,
1548                              .partitioning = 0};
1549    case 0xC2:
1550      return (CacheLevelInfo){.level = UNDEF,
1551                              .cache_type = CPU_FEATURE_CACHE_DTLB,
1552                              .cache_size = 4 * KiB,
1553                              .ways = 4,
1554                              .line_size = UNDEF,
1555                              .tlb_entries = 16,
1556                              .partitioning = 0};
1557    case 0xC3:
1558      return (CacheLevelInfo){.level = UNDEF,
1559                              .cache_type = CPU_FEATURE_CACHE_STLB,
1560                              .cache_size = 4 * KiB,
1561                              .ways = 6,
1562                              .line_size = UNDEF,
1563                              .tlb_entries = 1536,
1564                              .partitioning = 0};
1565    case 0xCA:
1566      return (CacheLevelInfo){.level = UNDEF,
1567                              .cache_type = CPU_FEATURE_CACHE_STLB,
1568                              .cache_size = 4 * KiB,
1569                              .ways = 4,
1570                              .line_size = UNDEF,
1571                              .tlb_entries = 512,
1572                              .partitioning = 0};
1573    case 0xD0:
1574      return (CacheLevelInfo){.level = 3,
1575                              .cache_type = CPU_FEATURE_CACHE_DATA,
1576                              .cache_size = 512 * KiB,
1577                              .ways = 4,
1578                              .line_size = 64,
1579                              .tlb_entries = UNDEF,
1580                              .partitioning = 0};
1581    case 0xD1:
1582      return (CacheLevelInfo){.level = 3,
1583                              .cache_type = CPU_FEATURE_CACHE_DATA,
1584                              .cache_size = 1 * MiB,
1585                              .ways = 4,
1586                              .line_size = 64,
1587                              .tlb_entries = UNDEF,
1588                              .partitioning = 0};
1589    case 0xD2:
1590      return (CacheLevelInfo){.level = 3,
1591                              .cache_type = CPU_FEATURE_CACHE_DATA,
1592                              .cache_size = 2 * MiB,
1593                              .ways = 4,
1594                              .line_size = 64,
1595                              .tlb_entries = UNDEF,
1596                              .partitioning = 0};
1597    case 0xD6:
1598      return (CacheLevelInfo){.level = 3,
1599                              .cache_type = CPU_FEATURE_CACHE_DATA,
1600                              .cache_size = 1 * MiB,
1601                              .ways = 8,
1602                              .line_size = 64,
1603                              .tlb_entries = UNDEF,
1604                              .partitioning = 0};
1605    case 0xD7:
1606      return (CacheLevelInfo){.level = 3,
1607                              .cache_type = CPU_FEATURE_CACHE_DATA,
1608                              .cache_size = 2 * MiB,
1609                              .ways = 8,
1610                              .line_size = 64,
1611                              .tlb_entries = UNDEF,
1612                              .partitioning = 0};
1613    case 0xD8:
1614      return (CacheLevelInfo){.level = 3,
1615                              .cache_type = CPU_FEATURE_CACHE_DATA,
1616                              .cache_size = 4 * MiB,
1617                              .ways = 8,
1618                              .line_size = 64,
1619                              .tlb_entries = UNDEF,
1620                              .partitioning = 0};
1621    case 0xDC:
1622      return (CacheLevelInfo){.level = 3,
1623                              .cache_type = CPU_FEATURE_CACHE_DATA,
1624                              .cache_size = 1 * 1536 * KiB,
1625                              .ways = 12,
1626                              .line_size = 64,
1627                              .tlb_entries = UNDEF,
1628                              .partitioning = 0};
1629    case 0xDD:
1630      return (CacheLevelInfo){.level = 3,
1631                              .cache_type = CPU_FEATURE_CACHE_DATA,
1632                              .cache_size = 3 * MiB,
1633                              .ways = 12,
1634                              .line_size = 64,
1635                              .tlb_entries = UNDEF,
1636                              .partitioning = 0};
1637    case 0xDE:
1638      return (CacheLevelInfo){.level = 3,
1639                              .cache_type = CPU_FEATURE_CACHE_DATA,
1640                              .cache_size = 6 * MiB,
1641                              .ways = 12,
1642                              .line_size = 64,
1643                              .tlb_entries = UNDEF,
1644                              .partitioning = 0};
1645    case 0xE2:
1646      return (CacheLevelInfo){.level = 3,
1647                              .cache_type = CPU_FEATURE_CACHE_DATA,
1648                              .cache_size = 2 * MiB,
1649                              .ways = 16,
1650                              .line_size = 64,
1651                              .tlb_entries = UNDEF,
1652                              .partitioning = 0};
1653    case 0xE3:
1654      return (CacheLevelInfo){.level = 3,
1655                              .cache_type = CPU_FEATURE_CACHE_DATA,
1656                              .cache_size = 4 * MiB,
1657                              .ways = 16,
1658                              .line_size = 64,
1659                              .tlb_entries = UNDEF,
1660                              .partitioning = 0};
1661    case 0xE4:
1662      return (CacheLevelInfo){.level = 3,
1663                              .cache_type = CPU_FEATURE_CACHE_DATA,
1664                              .cache_size = 8 * MiB,
1665                              .ways = 16,
1666                              .line_size = 64,
1667                              .tlb_entries = UNDEF,
1668                              .partitioning = 0};
1669    case 0xEA:
1670      return (CacheLevelInfo){.level = 3,
1671                              .cache_type = CPU_FEATURE_CACHE_DATA,
1672                              .cache_size = 12 * MiB,
1673                              .ways = 24,
1674                              .line_size = 64,
1675                              .tlb_entries = UNDEF,
1676                              .partitioning = 0};
1677    case 0xEB:
1678      return (CacheLevelInfo){.level = 3,
1679                              .cache_type = CPU_FEATURE_CACHE_DATA,
1680                              .cache_size = 18 * MiB,
1681                              .ways = 24,
1682                              .line_size = 64,
1683                              .tlb_entries = UNDEF,
1684                              .partitioning = 0};
1685    case 0xEC:
1686      return (CacheLevelInfo){.level = 3,
1687                              .cache_type = CPU_FEATURE_CACHE_DATA,
1688                              .cache_size = 24 * MiB,
1689                              .ways = 24,
1690                              .line_size = 64,
1691                              .tlb_entries = UNDEF,
1692                              .partitioning = 0};
1693    case 0xF0:
1694      return (CacheLevelInfo){.level = UNDEF,
1695                              .cache_type = CPU_FEATURE_CACHE_PREFETCH,
1696                              .cache_size = 64 * KiB,
1697                              .ways = UNDEF,
1698                              .line_size = UNDEF,
1699                              .tlb_entries = UNDEF,
1700                              .partitioning = 0};
1701    case 0xF1:
1702      return (CacheLevelInfo){.level = UNDEF,
1703                              .cache_type = CPU_FEATURE_CACHE_PREFETCH,
1704                              .cache_size = 128 * KiB,
1705                              .ways = UNDEF,
1706                              .line_size = UNDEF,
1707                              .tlb_entries = UNDEF,
1708                              .partitioning = 0};
1709    case 0xFF:
1710      return (CacheLevelInfo){.level = UNDEF,
1711                              .cache_type = CPU_FEATURE_CACHE_NULL,
1712                              .cache_size = UNDEF,
1713                              .ways = UNDEF,
1714                              .line_size = UNDEF,
1715                              .tlb_entries = UNDEF,
1716                              .partitioning = 0};
1717    default:
1718      return kEmptyCacheLevelInfo;
1719  }
1720}
1721
1722// From https://www.felixcloutier.com/x86/cpuid#tbl-3-12
1723static void ParseLeaf2(const Leaves* leaves, CacheInfo* info) {
1724  Leaf leaf = leaves->leaf_2;
1725  // The least-significant byte in register EAX (register AL) will always return
1726  // 01H. Software should ignore this value and not interpret it as an
1727  // informational descriptor.
1728  leaf.eax &= 0xFFFFFF00;  // Zeroing out AL. 0 is the empty descriptor.
1729  // The most significant bit (bit 31) of each register indicates whether the
1730  // register contains valid information (set to 0) or is reserved (set to 1).
1731  if (IsBitSet(leaf.eax, 31)) leaf.eax = 0;
1732  if (IsBitSet(leaf.ebx, 31)) leaf.ebx = 0;
1733  if (IsBitSet(leaf.ecx, 31)) leaf.ecx = 0;
1734  if (IsBitSet(leaf.edx, 31)) leaf.edx = 0;
1735
1736  uint8_t data[16];
1737#if __STDC_VERSION__ >= 201112L
1738  _Static_assert(sizeof(Leaf) == sizeof(data), "Leaf must be 16 bytes");
1739#endif
1740  copy((char*)(data), (const char*)(&leaf), sizeof(data));
1741  for (size_t i = 0; i < sizeof(data); ++i) {
1742    const uint8_t descriptor = data[i];
1743    if (descriptor == 0) continue;
1744    info->levels[info->size] = GetCacheLevelInfo(descriptor);
1745    info->size++;
1746  }
1747}
1748
1749static const CacheInfo kEmptyCacheInfo;
1750
1751// For newer Intel CPUs uses "CPUID, eax=0x00000004".
1752// https://www.felixcloutier.com/x86/cpuid#input-eax-=-04h--returns-deterministic-cache-parameters-for-each-level
1753// For newer AMD CPUs uses "CPUID, eax=0x8000001D"
1754static void ParseCacheInfo(const int max_cpuid_leaf, uint32_t leaf_id,
1755                           CacheInfo* old_info) {
1756  CacheInfo info = kEmptyCacheInfo;
1757  for (int index = 0; info.size < CPU_FEATURES_MAX_CACHE_LEVEL; ++index) {
1758    const Leaf leaf = SafeCpuIdEx(max_cpuid_leaf, leaf_id, index);
1759    int cache_type_field = ExtractBitRange(leaf.eax, 4, 0);
1760    CacheType cache_type;
1761    if (cache_type_field == 1)
1762      cache_type = CPU_FEATURE_CACHE_DATA;
1763    else if (cache_type_field == 2)
1764      cache_type = CPU_FEATURE_CACHE_INSTRUCTION;
1765    else if (cache_type_field == 3)
1766      cache_type = CPU_FEATURE_CACHE_UNIFIED;
1767    else
1768      // Intel Processor Identification and the CPUID Instruction Application
1769      // Note 485 page 37 Table 5-10. Deterministic Cache Parameters.
1770      // We skip cache parsing in case null of cache type or cache type in the
1771      // range of 4-31 according to documentation.
1772      break;
1773    int level = ExtractBitRange(leaf.eax, 7, 5);
1774    int line_size = ExtractBitRange(leaf.ebx, 11, 0) + 1;
1775    int partitioning = ExtractBitRange(leaf.ebx, 21, 12) + 1;
1776    int ways = ExtractBitRange(leaf.ebx, 31, 22) + 1;
1777    int tlb_entries = leaf.ecx + 1;
1778    int cache_size = ways * partitioning * line_size * tlb_entries;
1779    info.levels[info.size] = (CacheLevelInfo){.level = level,
1780                                              .cache_type = cache_type,
1781                                              .cache_size = cache_size,
1782                                              .ways = ways,
1783                                              .line_size = line_size,
1784                                              .tlb_entries = tlb_entries,
1785                                              .partitioning = partitioning};
1786    ++info.size;
1787  }
1788  // Override CacheInfo if we successfully extracted Deterministic Cache
1789  // Parameters.
1790  if (info.size > 0) *old_info = info;
1791}
1792
1793typedef struct {
1794  int level;
1795  int cache_id;
1796  CacheType cache_type;
1797} CacheLevelInfoLegacyAMD;
1798
1799static int GetWaysLegacyAMD(int cache_level, const uint32_t cache_id) {
1800  // https://www.amd.com/system/files/TechDocs/25481.pdf page 23
1801  // CPUID.8000_0005_ECX[23:16] L1 data cache associativity.
1802  // CPUID.8000_0005_EDX[23:16] L1 instruction cache associativity.
1803  if (cache_level == 1) {
1804    return ExtractBitRange(cache_id, 23, 16);
1805  }
1806  // https://www.amd.com/system/files/TechDocs/25481.pdf page 24
1807  // See Table 4: L2/L3 Cache and TLB Associativity Field Definition.
1808  // CPUID.8000_0006_ECX[15:12] L2 cache associativity.
1809  // CPUID.8000_0006_EDX[15:12] L3 cache associativity.
1810  const int ways = ExtractBitRange(cache_id, 15, 12);
1811  switch (ways) {
1812    case 0x0:
1813    case 0x1:
1814    case 0x2:
1815    case 0x4:
1816      return ways;
1817    case 0x6:
1818      return 8;
1819    case 0x8:
1820      return 16;
1821    case 0xA:
1822      return 32;
1823    case 0xB:
1824      return 48;
1825    case 0xC:
1826      return 64;
1827    case 0xD:
1828      return 96;
1829    case 0xE:
1830      return 128;
1831    case 0xF:
1832      return 255;
1833    default:
1834      return -1;  // Reserved
1835  }
1836}
1837
1838static int GetCacheSizeLegacyAMD(int cache_level, const uint32_t cache_id) {
1839  switch (cache_level) {
1840    case 1:
1841      // https://www.amd.com/system/files/TechDocs/25481.pdf page 23
1842      // CPUID.8000_0005_ECX[31:24] L1 data cache size in KB.
1843      // CPUID.8000_0005_EDX[31:24] L1 instruction cache size KB.
1844      return ExtractBitRange(cache_id, 31, 24);
1845    case 2:
1846      // https://www.amd.com/system/files/TechDocs/25481.pdf page 25
1847      // CPUID.8000_0006_ECX[31:16] L2 cache size in KB.
1848      return ExtractBitRange(cache_id, 31, 16);
1849    case 3:
1850      // https://www.amd.com/system/files/TechDocs/25481.pdf page 25
1851      // CPUID.8000_0006_EDX[31:18] L3 cache size.
1852      // Specifies the L3 cache size is within the following range:
1853      // (L3Size[31:18] * 512KB) <= L3 cache size < ((L3Size[31:18]+1) * 512KB).
1854      return ExtractBitRange(cache_id, 31, 18) * 512;
1855    default:
1856      return 0;
1857  }
1858}
1859
1860#define LEGACY_AMD_MAX_CACHE_LEVEL 4
1861
1862// https://www.amd.com/system/files/TechDocs/25481.pdf
1863// CPUID Fn8000_0005_E[A,B,C,D]X, Fn8000_0006_E[A,B,C,D]X - TLB and Cache info
1864static void ParseCacheInfoLegacyAMD(const uint32_t max_ext, CacheInfo* info) {
1865  const Leaf cache_tlb_leaf1 = SafeCpuIdEx(max_ext, 0x80000005, 0);
1866  const Leaf cache_tlb_leaf2 = SafeCpuIdEx(max_ext, 0x80000006, 0);
1867
1868  const CacheLevelInfoLegacyAMD legacy_cache_info[LEGACY_AMD_MAX_CACHE_LEVEL] =
1869      {(CacheLevelInfoLegacyAMD){.cache_id = cache_tlb_leaf1.ecx,
1870                                 .cache_type = CPU_FEATURE_CACHE_DATA,
1871                                 .level = 1},
1872       (CacheLevelInfoLegacyAMD){.cache_id = cache_tlb_leaf1.edx,
1873                                 .cache_type = CPU_FEATURE_CACHE_INSTRUCTION,
1874                                 .level = 1},
1875       (CacheLevelInfoLegacyAMD){.cache_id = cache_tlb_leaf2.ecx,
1876                                 .cache_type = CPU_FEATURE_CACHE_UNIFIED,
1877                                 .level = 2},
1878       (CacheLevelInfoLegacyAMD){.cache_id = cache_tlb_leaf2.edx,
1879                                 .cache_type = CPU_FEATURE_CACHE_UNIFIED,
1880                                 .level = 3}};
1881
1882  const int KiB = 1024;
1883  const int UNDEF = -1;
1884  for (int i = 0; i < LEGACY_AMD_MAX_CACHE_LEVEL; ++i) {
1885    const int level = legacy_cache_info[i].level;
1886    const int cache_id = legacy_cache_info[i].cache_id;
1887    const CacheType cache_type = legacy_cache_info[i].cache_type;
1888    const int cache_size = GetCacheSizeLegacyAMD(level, cache_id);
1889    if (cache_size == 0) break;
1890    info->levels[i] =
1891        (CacheLevelInfo){.level = level,
1892                         .cache_type = cache_type,
1893                         .cache_size = cache_size * KiB,
1894                         .ways = GetWaysLegacyAMD(level, cache_id),
1895                         .line_size = ExtractBitRange(cache_id, 7, 0),
1896                         .tlb_entries = UNDEF,
1897                         .partitioning = UNDEF};
1898    ++info->size;
1899  }
1900}
1901
1902CacheInfo GetX86CacheInfo(void) {
1903  CacheInfo info = kEmptyCacheInfo;
1904  const Leaves leaves = ReadLeaves();
1905  if (IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL) ||
1906      IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_CENTAUR_HAULS) ||
1907      IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_SHANGHAI)) {
1908    ParseLeaf2(&leaves, &info);
1909    ParseCacheInfo(leaves.max_cpuid_leaf, 4, &info);
1910  } else if (IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD) ||
1911             IsVendor(leaves.leaf_0, CPU_FEATURES_VENDOR_HYGON_GENUINE)) {
1912    // If CPUID Fn8000_0001_ECX[TopologyExtensions]==0
1913    // then CPUID Fn8000_0001_E[D,C,B,A]X is reserved.
1914    // https://www.amd.com/system/files/TechDocs/25481.pdf
1915    if (IsBitSet(leaves.leaf_80000001.ecx, 22)) {
1916      ParseCacheInfo(leaves.max_cpuid_leaf_ext, 0x8000001D, &info);
1917    } else {
1918      ParseCacheInfoLegacyAMD(leaves.max_cpuid_leaf_ext, &info);
1919    }
1920  }
1921  return info;
1922}
1923
1924////////////////////////////////////////////////////////////////////////////////
1925// Definitions for introspection.
1926////////////////////////////////////////////////////////////////////////////////
1927#define INTROSPECTION_TABLE                                \
1928  LINE(X86_FPU, fpu, , , )                                 \
1929  LINE(X86_TSC, tsc, , , )                                 \
1930  LINE(X86_CX8, cx8, , , )                                 \
1931  LINE(X86_CLFSH, clfsh, , , )                             \
1932  LINE(X86_MMX, mmx, , , )                                 \
1933  LINE(X86_AES, aes, , , )                                 \
1934  LINE(X86_ERMS, erms, , , )                               \
1935  LINE(X86_F16C, f16c, , , )                               \
1936  LINE(X86_FMA4, fma4, , , )                               \
1937  LINE(X86_FMA3, fma3, , , )                               \
1938  LINE(X86_VAES, vaes, , , )                               \
1939  LINE(X86_VPCLMULQDQ, vpclmulqdq, , , )                   \
1940  LINE(X86_BMI1, bmi1, , , )                               \
1941  LINE(X86_HLE, hle, , , )                                 \
1942  LINE(X86_BMI2, bmi2, , , )                               \
1943  LINE(X86_RTM, rtm, , , )                                 \
1944  LINE(X86_RDSEED, rdseed, , , )                           \
1945  LINE(X86_CLFLUSHOPT, clflushopt, , , )                   \
1946  LINE(X86_CLWB, clwb, , , )                               \
1947  LINE(X86_SSE, sse, , , )                                 \
1948  LINE(X86_SSE2, sse2, , , )                               \
1949  LINE(X86_SSE3, sse3, , , )                               \
1950  LINE(X86_SSSE3, ssse3, , , )                             \
1951  LINE(X86_SSE4_1, sse4_1, , , )                           \
1952  LINE(X86_SSE4_2, sse4_2, , , )                           \
1953  LINE(X86_SSE4A, sse4a, , , )                             \
1954  LINE(X86_AVX, avx, , , )                                 \
1955  LINE(X86_AVX_VNNI, avx_vnni, , , )                       \
1956  LINE(X86_AVX2, avx2, , , )                               \
1957  LINE(X86_AVX512F, avx512f, , , )                         \
1958  LINE(X86_AVX512CD, avx512cd, , , )                       \
1959  LINE(X86_AVX512ER, avx512er, , , )                       \
1960  LINE(X86_AVX512PF, avx512pf, , , )                       \
1961  LINE(X86_AVX512BW, avx512bw, , , )                       \
1962  LINE(X86_AVX512DQ, avx512dq, , , )                       \
1963  LINE(X86_AVX512VL, avx512vl, , , )                       \
1964  LINE(X86_AVX512IFMA, avx512ifma, , , )                   \
1965  LINE(X86_AVX512VBMI, avx512vbmi, , , )                   \
1966  LINE(X86_AVX512VBMI2, avx512vbmi2, , , )                 \
1967  LINE(X86_AVX512VNNI, avx512vnni, , , )                   \
1968  LINE(X86_AVX512BITALG, avx512bitalg, , , )               \
1969  LINE(X86_AVX512VPOPCNTDQ, avx512vpopcntdq, , , )         \
1970  LINE(X86_AVX512_4VNNIW, avx512_4vnniw, , , )             \
1971  LINE(X86_AVX512_4VBMI2, avx512_4vbmi2, , , )             \
1972  LINE(X86_AVX512_SECOND_FMA, avx512_second_fma, , , )     \
1973  LINE(X86_AVX512_4FMAPS, avx512_4fmaps, , , )             \
1974  LINE(X86_AVX512_BF16, avx512_bf16, , , )                 \
1975  LINE(X86_AVX512_VP2INTERSECT, avx512_vp2intersect, , , ) \
1976  LINE(X86_AVX512_FP16, avx512_fp16, , , )                 \
1977  LINE(X86_AMX_BF16, amx_bf16, , , )                       \
1978  LINE(X86_AMX_TILE, amx_tile, , , )                       \
1979  LINE(X86_AMX_INT8, amx_int8, , , )                       \
1980  LINE(X86_AMX_FP16, amx_fp16, , , )                       \
1981  LINE(X86_PCLMULQDQ, pclmulqdq, , , )                     \
1982  LINE(X86_SMX, smx, , , )                                 \
1983  LINE(X86_SGX, sgx, , , )                                 \
1984  LINE(X86_CX16, cx16, , , )                               \
1985  LINE(X86_SHA, sha, , , )                                 \
1986  LINE(X86_POPCNT, popcnt, , , )                           \
1987  LINE(X86_MOVBE, movbe, , , )                             \
1988  LINE(X86_RDRND, rdrnd, , , )                             \
1989  LINE(X86_DCA, dca, , , )                                 \
1990  LINE(X86_SS, ss, , , )                                   \
1991  LINE(X86_ADX, adx, , , )                                 \
1992  LINE(X86_LZCNT, lzcnt, , , )                             \
1993  LINE(X86_GFNI, gfni, , , )                               \
1994  LINE(X86_MOVDIRI, movdiri, , , )                         \
1995  LINE(X86_MOVDIR64B, movdir64b, , , )                     \
1996  LINE(X86_FS_REP_MOV, fs_rep_mov, , , )                   \
1997  LINE(X86_FZ_REP_MOVSB, fz_rep_movsb, , , )               \
1998  LINE(X86_FS_REP_STOSB, fs_rep_stosb, , , )               \
1999  LINE(X86_FS_REP_CMPSB_SCASB, fs_rep_cmpsb_scasb, , , )   \
2000  LINE(X86_LAM, lam, , , )                                 \
2001  LINE(X86_UAI, uai, , , )
2002#define INTROSPECTION_PREFIX X86
2003#define INTROSPECTION_ENUM_PREFIX X86
2004#include "define_introspection.inl"
2005
2006#define X86_MICROARCHITECTURE_NAMES \
2007  LINE(X86_UNKNOWN)                 \
2008  LINE(ZHAOXIN_ZHANGJIANG)          \
2009  LINE(ZHAOXIN_WUDAOKOU)            \
2010  LINE(ZHAOXIN_LUJIAZUI)            \
2011  LINE(ZHAOXIN_YONGFENG)            \
2012  LINE(INTEL_80486)                 \
2013  LINE(INTEL_P5)                    \
2014  LINE(INTEL_LAKEMONT)              \
2015  LINE(INTEL_CORE)                  \
2016  LINE(INTEL_PNR)                   \
2017  LINE(INTEL_NHM)                   \
2018  LINE(INTEL_ATOM_BNL)              \
2019  LINE(INTEL_WSM)                   \
2020  LINE(INTEL_SNB)                   \
2021  LINE(INTEL_IVB)                   \
2022  LINE(INTEL_ATOM_SMT)              \
2023  LINE(INTEL_HSW)                   \
2024  LINE(INTEL_BDW)                   \
2025  LINE(INTEL_SKL)                   \
2026  LINE(INTEL_CCL)                   \
2027  LINE(INTEL_ATOM_GMT)              \
2028  LINE(INTEL_ATOM_GMT_PLUS)         \
2029  LINE(INTEL_ATOM_TMT)              \
2030  LINE(INTEL_KBL)                   \
2031  LINE(INTEL_CFL)                   \
2032  LINE(INTEL_WHL)                   \
2033  LINE(INTEL_CML)                   \
2034  LINE(INTEL_CNL)                   \
2035  LINE(INTEL_ICL)                   \
2036  LINE(INTEL_TGL)                   \
2037  LINE(INTEL_SPR)                   \
2038  LINE(INTEL_ADL)                   \
2039  LINE(INTEL_RCL)                   \
2040  LINE(INTEL_RPL)                   \
2041  LINE(INTEL_KNIGHTS_M)             \
2042  LINE(INTEL_KNIGHTS_L)             \
2043  LINE(INTEL_KNIGHTS_F)             \
2044  LINE(INTEL_KNIGHTS_C)             \
2045  LINE(INTEL_NETBURST)              \
2046  LINE(AMD_HAMMER)                  \
2047  LINE(AMD_K10)                     \
2048  LINE(AMD_K11)                     \
2049  LINE(AMD_K12)                     \
2050  LINE(AMD_BOBCAT)                  \
2051  LINE(AMD_PILEDRIVER)              \
2052  LINE(AMD_STREAMROLLER)            \
2053  LINE(AMD_EXCAVATOR)               \
2054  LINE(AMD_BULLDOZER)               \
2055  LINE(AMD_JAGUAR)                  \
2056  LINE(AMD_PUMA)                    \
2057  LINE(AMD_ZEN)                     \
2058  LINE(AMD_ZEN_PLUS)                \
2059  LINE(AMD_ZEN2)                    \
2060  LINE(AMD_ZEN3)                    \
2061  LINE(AMD_ZEN4)
2062
2063const char* GetX86MicroarchitectureName(X86Microarchitecture value) {
2064#define LINE(ENUM) [ENUM] = STRINGIZE(ENUM),
2065  static const char* kMicroarchitectureNames[] = {X86_MICROARCHITECTURE_NAMES};
2066#undef LINE
2067  if (value >= X86_MICROARCHITECTURE_LAST_) return "unknown microarchitecture";
2068  return kMicroarchitectureNames[value];
2069}
2070