1*2b54f0dbSXin Li #include <stdio.h>
2*2b54f0dbSXin Li #include <stdint.h>
3*2b54f0dbSXin Li #include <stdlib.h>
4*2b54f0dbSXin Li #include <string.h>
5*2b54f0dbSXin Li #include <alloca.h>
6*2b54f0dbSXin Li
7*2b54f0dbSXin Li #include <errno.h>
8*2b54f0dbSXin Li #include <sys/types.h>
9*2b54f0dbSXin Li #include <sys/sysctl.h>
10*2b54f0dbSXin Li #include <mach/machine.h>
11*2b54f0dbSXin Li
12*2b54f0dbSXin Li #include <cpuinfo.h>
13*2b54f0dbSXin Li #include <mach/api.h>
14*2b54f0dbSXin Li #include <cpuinfo/internal-api.h>
15*2b54f0dbSXin Li #include <cpuinfo/log.h>
16*2b54f0dbSXin Li
17*2b54f0dbSXin Li /* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
18*2b54f0dbSXin Li #ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
19*2b54f0dbSXin Li #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
20*2b54f0dbSXin Li #endif
21*2b54f0dbSXin Li #ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
22*2b54f0dbSXin Li #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
23*2b54f0dbSXin Li #endif
24*2b54f0dbSXin Li #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
25*2b54f0dbSXin Li #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
26*2b54f0dbSXin Li #endif
27*2b54f0dbSXin Li #ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
28*2b54f0dbSXin Li #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D
29*2b54f0dbSXin Li #endif
30*2b54f0dbSXin Li
31*2b54f0dbSXin Li struct cpuinfo_arm_isa cpuinfo_isa = {
32*2b54f0dbSXin Li .aes = true,
33*2b54f0dbSXin Li .sha1 = true,
34*2b54f0dbSXin Li .sha2 = true,
35*2b54f0dbSXin Li .pmull = true,
36*2b54f0dbSXin Li .crc32 = true,
37*2b54f0dbSXin Li };
38*2b54f0dbSXin Li
get_sys_info(int type_specifier,const char * name)39*2b54f0dbSXin Li static uint32_t get_sys_info(int type_specifier, const char* name) {
40*2b54f0dbSXin Li size_t size = 0;
41*2b54f0dbSXin Li uint32_t result = 0;
42*2b54f0dbSXin Li int mib[2] = { CTL_HW, type_specifier };
43*2b54f0dbSXin Li if (sysctl(mib, 2, NULL, &size, NULL, 0) != 0) {
44*2b54f0dbSXin Li cpuinfo_log_info("sysctl(\"%s\") failed: %s", name, strerror(errno));
45*2b54f0dbSXin Li } else if (size == sizeof(uint32_t)) {
46*2b54f0dbSXin Li sysctl(mib, 2, &result, &size, NULL, 0);
47*2b54f0dbSXin Li cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", name, result, size);
48*2b54f0dbSXin Li } else {
49*2b54f0dbSXin Li cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", name);
50*2b54f0dbSXin Li }
51*2b54f0dbSXin Li return result;
52*2b54f0dbSXin Li }
53*2b54f0dbSXin Li
get_sys_info_by_name(const char * type_specifier)54*2b54f0dbSXin Li static uint32_t get_sys_info_by_name(const char* type_specifier) {
55*2b54f0dbSXin Li size_t size = 0;
56*2b54f0dbSXin Li uint32_t result = 0;
57*2b54f0dbSXin Li if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) {
58*2b54f0dbSXin Li cpuinfo_log_info("sysctlbyname(\"%s\") failed: %s", type_specifier, strerror(errno));
59*2b54f0dbSXin Li } else if (size == sizeof(uint32_t)) {
60*2b54f0dbSXin Li sysctlbyname(type_specifier, &result, &size, NULL, 0);
61*2b54f0dbSXin Li cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", type_specifier, result, size);
62*2b54f0dbSXin Li } else {
63*2b54f0dbSXin Li cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", type_specifier);
64*2b54f0dbSXin Li }
65*2b54f0dbSXin Li return result;
66*2b54f0dbSXin Li }
67*2b54f0dbSXin Li
decode_uarch(uint32_t cpu_family,uint32_t core_index,uint32_t core_count)68*2b54f0dbSXin Li static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, uint32_t core_count) {
69*2b54f0dbSXin Li switch (cpu_family) {
70*2b54f0dbSXin Li case CPUFAMILY_ARM_CYCLONE:
71*2b54f0dbSXin Li return cpuinfo_uarch_cyclone;
72*2b54f0dbSXin Li case CPUFAMILY_ARM_TYPHOON:
73*2b54f0dbSXin Li return cpuinfo_uarch_typhoon;
74*2b54f0dbSXin Li case CPUFAMILY_ARM_TWISTER:
75*2b54f0dbSXin Li return cpuinfo_uarch_twister;
76*2b54f0dbSXin Li case CPUFAMILY_ARM_HURRICANE:
77*2b54f0dbSXin Li return cpuinfo_uarch_hurricane;
78*2b54f0dbSXin Li case CPUFAMILY_ARM_MONSOON_MISTRAL:
79*2b54f0dbSXin Li /* 2x Monsoon + 4x Mistral cores */
80*2b54f0dbSXin Li return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
81*2b54f0dbSXin Li case CPUFAMILY_ARM_VORTEX_TEMPEST:
82*2b54f0dbSXin Li /* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */
83*2b54f0dbSXin Li return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
84*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
85*2b54f0dbSXin Li /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */
86*2b54f0dbSXin Li return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
87*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
88*2b54f0dbSXin Li /* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */
89*2b54f0dbSXin Li return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm;
90*2b54f0dbSXin Li case CPUFAMILY_ARM_AVALANCHE_BLIZZARD:
91*2b54f0dbSXin Li /* Hexa-core: 2x Avalanche + 4x Blizzard */
92*2b54f0dbSXin Li return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard;
93*2b54f0dbSXin Li default:
94*2b54f0dbSXin Li /* Use hw.cpusubtype for detection */
95*2b54f0dbSXin Li break;
96*2b54f0dbSXin Li }
97*2b54f0dbSXin Li
98*2b54f0dbSXin Li return cpuinfo_uarch_unknown;
99*2b54f0dbSXin Li }
100*2b54f0dbSXin Li
decode_package_name(char * package_name)101*2b54f0dbSXin Li static void decode_package_name(char* package_name) {
102*2b54f0dbSXin Li size_t size;
103*2b54f0dbSXin Li if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) {
104*2b54f0dbSXin Li cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
105*2b54f0dbSXin Li return;
106*2b54f0dbSXin Li }
107*2b54f0dbSXin Li
108*2b54f0dbSXin Li char *machine_name = alloca(size);
109*2b54f0dbSXin Li if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) {
110*2b54f0dbSXin Li cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
111*2b54f0dbSXin Li return;
112*2b54f0dbSXin Li }
113*2b54f0dbSXin Li cpuinfo_log_debug("hw.machine: %s", machine_name);
114*2b54f0dbSXin Li
115*2b54f0dbSXin Li char name[10];
116*2b54f0dbSXin Li uint32_t major = 0, minor = 0;
117*2b54f0dbSXin Li if (sscanf(machine_name, "%9[^,0123456789]%"SCNu32",%"SCNu32, name, &major, &minor) != 3) {
118*2b54f0dbSXin Li cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno));
119*2b54f0dbSXin Li return;
120*2b54f0dbSXin Li }
121*2b54f0dbSXin Li
122*2b54f0dbSXin Li uint32_t chip_model = 0;
123*2b54f0dbSXin Li char suffix = '\0';
124*2b54f0dbSXin Li if (strcmp(name, "iPhone") == 0) {
125*2b54f0dbSXin Li /*
126*2b54f0dbSXin Li * iPhone 4 and up are supported:
127*2b54f0dbSXin Li * - iPhone 4 [A4]: iPhone3,1, iPhone3,2, iPhone3,3
128*2b54f0dbSXin Li * - iPhone 4S [A5]: iPhone4,1
129*2b54f0dbSXin Li * - iPhone 5 [A6]: iPhone5,1, iPhone5,2
130*2b54f0dbSXin Li * - iPhone 5c [A6]: iPhone5,3, iPhone5,4
131*2b54f0dbSXin Li * - iPhone 5s [A7]: iPhone6,1, iPhone6,2
132*2b54f0dbSXin Li * - iPhone 6 [A8]: iPhone7,2
133*2b54f0dbSXin Li * - iPhone 6 Plus [A8]: iPhone7,1
134*2b54f0dbSXin Li * - iPhone 6s [A9]: iPhone8,1
135*2b54f0dbSXin Li * - iPhone 6s Plus [A9]: iPhone8,2
136*2b54f0dbSXin Li * - iPhone SE [A9]: iPhone8,4
137*2b54f0dbSXin Li * - iPhone 7 [A10]: iPhone9,1, iPhone9,3
138*2b54f0dbSXin Li * - iPhone 7 Plus [A10]: iPhone9,2, iPhone9,4
139*2b54f0dbSXin Li * - iPhone 8 [A11]: iPhone10,1, iPhone10,4
140*2b54f0dbSXin Li * - iPhone 8 Plus [A11]: iPhone10,2, iPhone10,5
141*2b54f0dbSXin Li * - iPhone X [A11]: iPhone10,3, iPhone10,6
142*2b54f0dbSXin Li * - iPhone XS [A12]: iPhone11,2,
143*2b54f0dbSXin Li * - iPhone XS Max [A12]: iPhone11,4, iPhone11,6
144*2b54f0dbSXin Li * - iPhone XR [A12]: iPhone11,8
145*2b54f0dbSXin Li */
146*2b54f0dbSXin Li chip_model = major + 1;
147*2b54f0dbSXin Li } else if (strcmp(name, "iPad") == 0) {
148*2b54f0dbSXin Li switch (major) {
149*2b54f0dbSXin Li /* iPad 2 and up are supported */
150*2b54f0dbSXin Li case 2:
151*2b54f0dbSXin Li /*
152*2b54f0dbSXin Li * iPad 2 [A5]: iPad2,1, iPad2,2, iPad2,3, iPad2,4
153*2b54f0dbSXin Li * iPad mini [A5]: iPad2,5, iPad2,6, iPad2,7
154*2b54f0dbSXin Li */
155*2b54f0dbSXin Li chip_model = major + 3;
156*2b54f0dbSXin Li break;
157*2b54f0dbSXin Li case 3:
158*2b54f0dbSXin Li /*
159*2b54f0dbSXin Li * iPad 3rd Gen [A5X]: iPad3,1, iPad3,2, iPad3,3
160*2b54f0dbSXin Li * iPad 4th Gen [A6X]: iPad3,4, iPad3,5, iPad3,6
161*2b54f0dbSXin Li */
162*2b54f0dbSXin Li chip_model = (minor <= 3) ? 5 : 6;
163*2b54f0dbSXin Li suffix = 'X';
164*2b54f0dbSXin Li break;
165*2b54f0dbSXin Li case 4:
166*2b54f0dbSXin Li /*
167*2b54f0dbSXin Li * iPad Air [A7]: iPad4,1, iPad4,2, iPad4,3
168*2b54f0dbSXin Li * iPad mini Retina [A7]: iPad4,4, iPad4,5, iPad4,6
169*2b54f0dbSXin Li * iPad mini 3 [A7]: iPad4,7, iPad4,8, iPad4,9
170*2b54f0dbSXin Li */
171*2b54f0dbSXin Li chip_model = major + 3;
172*2b54f0dbSXin Li break;
173*2b54f0dbSXin Li case 5:
174*2b54f0dbSXin Li /*
175*2b54f0dbSXin Li * iPad mini 4 [A8]: iPad5,1, iPad5,2
176*2b54f0dbSXin Li * iPad Air 2 [A8X]: iPad5,3, iPad5,4
177*2b54f0dbSXin Li */
178*2b54f0dbSXin Li chip_model = major + 3;
179*2b54f0dbSXin Li suffix = (minor <= 2) ? '\0' : 'X';
180*2b54f0dbSXin Li break;
181*2b54f0dbSXin Li case 6:
182*2b54f0dbSXin Li /*
183*2b54f0dbSXin Li * iPad Pro 9.7" [A9X]: iPad6,3, iPad6,4
184*2b54f0dbSXin Li * iPad Pro [A9X]: iPad6,7, iPad6,8
185*2b54f0dbSXin Li * iPad 5th Gen [A9]: iPad6,11, iPad6,12
186*2b54f0dbSXin Li */
187*2b54f0dbSXin Li chip_model = major + 3;
188*2b54f0dbSXin Li suffix = minor <= 8 ? 'X' : '\0';
189*2b54f0dbSXin Li break;
190*2b54f0dbSXin Li case 7:
191*2b54f0dbSXin Li /*
192*2b54f0dbSXin Li * iPad Pro 12.9" [A10X]: iPad7,1, iPad7,2
193*2b54f0dbSXin Li * iPad Pro 10.5" [A10X]: iPad7,3, iPad7,4
194*2b54f0dbSXin Li * iPad 6th Gen [A10]: iPad7,5, iPad7,6
195*2b54f0dbSXin Li */
196*2b54f0dbSXin Li chip_model = major + 3;
197*2b54f0dbSXin Li suffix = minor <= 4 ? 'X' : '\0';
198*2b54f0dbSXin Li break;
199*2b54f0dbSXin Li default:
200*2b54f0dbSXin Li cpuinfo_log_info("unknown iPad: %s", machine_name);
201*2b54f0dbSXin Li break;
202*2b54f0dbSXin Li }
203*2b54f0dbSXin Li } else if (strcmp(name, "iPod") == 0) {
204*2b54f0dbSXin Li switch (major) {
205*2b54f0dbSXin Li case 5:
206*2b54f0dbSXin Li chip_model = 5;
207*2b54f0dbSXin Li break;
208*2b54f0dbSXin Li /* iPod touch (5th Gen) [A5]: iPod5,1 */
209*2b54f0dbSXin Li case 7:
210*2b54f0dbSXin Li /* iPod touch (6th Gen, 2015) [A8]: iPod7,1 */
211*2b54f0dbSXin Li chip_model = 8;
212*2b54f0dbSXin Li break;
213*2b54f0dbSXin Li default:
214*2b54f0dbSXin Li cpuinfo_log_info("unknown iPod: %s", machine_name);
215*2b54f0dbSXin Li break;
216*2b54f0dbSXin Li }
217*2b54f0dbSXin Li } else {
218*2b54f0dbSXin Li cpuinfo_log_info("unknown device: %s", machine_name);
219*2b54f0dbSXin Li }
220*2b54f0dbSXin Li if (chip_model != 0) {
221*2b54f0dbSXin Li snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%"PRIu32"%c", chip_model, suffix);
222*2b54f0dbSXin Li }
223*2b54f0dbSXin Li }
224*2b54f0dbSXin Li
cpuinfo_arm_mach_init(void)225*2b54f0dbSXin Li void cpuinfo_arm_mach_init(void) {
226*2b54f0dbSXin Li struct cpuinfo_processor* processors = NULL;
227*2b54f0dbSXin Li struct cpuinfo_core* cores = NULL;
228*2b54f0dbSXin Li struct cpuinfo_cluster* clusters = NULL;
229*2b54f0dbSXin Li struct cpuinfo_package* packages = NULL;
230*2b54f0dbSXin Li struct cpuinfo_uarch_info* uarchs = NULL;
231*2b54f0dbSXin Li struct cpuinfo_cache* l1i = NULL;
232*2b54f0dbSXin Li struct cpuinfo_cache* l1d = NULL;
233*2b54f0dbSXin Li struct cpuinfo_cache* l2 = NULL;
234*2b54f0dbSXin Li struct cpuinfo_cache* l3 = NULL;
235*2b54f0dbSXin Li
236*2b54f0dbSXin Li struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
237*2b54f0dbSXin Li processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
238*2b54f0dbSXin Li if (processors == NULL) {
239*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
240*2b54f0dbSXin Li mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads);
241*2b54f0dbSXin Li goto cleanup;
242*2b54f0dbSXin Li }
243*2b54f0dbSXin Li cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
244*2b54f0dbSXin Li if (cores == NULL) {
245*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
246*2b54f0dbSXin Li mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores);
247*2b54f0dbSXin Li goto cleanup;
248*2b54f0dbSXin Li }
249*2b54f0dbSXin Li packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
250*2b54f0dbSXin Li if (packages == NULL) {
251*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" packages",
252*2b54f0dbSXin Li mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages);
253*2b54f0dbSXin Li goto cleanup;
254*2b54f0dbSXin Li }
255*2b54f0dbSXin Li
256*2b54f0dbSXin Li const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
257*2b54f0dbSXin Li const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
258*2b54f0dbSXin Li const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
259*2b54f0dbSXin Li
260*2b54f0dbSXin Li for (uint32_t i = 0; i < mach_topology.packages; i++) {
261*2b54f0dbSXin Li packages[i] = (struct cpuinfo_package) {
262*2b54f0dbSXin Li .processor_start = i * threads_per_package,
263*2b54f0dbSXin Li .processor_count = threads_per_package,
264*2b54f0dbSXin Li .core_start = i * cores_per_package,
265*2b54f0dbSXin Li .core_count = cores_per_package,
266*2b54f0dbSXin Li };
267*2b54f0dbSXin Li decode_package_name(packages[i].name);
268*2b54f0dbSXin Li }
269*2b54f0dbSXin Li
270*2b54f0dbSXin Li
271*2b54f0dbSXin Li const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
272*2b54f0dbSXin Li
273*2b54f0dbSXin Li /*
274*2b54f0dbSXin Li * iOS 15 and macOS 12 added sysctls for ARM features, use them where possible.
275*2b54f0dbSXin Li * Otherwise, fallback to hardcoded set of CPUs with known support.
276*2b54f0dbSXin Li */
277*2b54f0dbSXin Li const uint32_t has_feat_lse = get_sys_info_by_name("hw.optional.arm.FEAT_LSE");
278*2b54f0dbSXin Li if (has_feat_lse != 0) {
279*2b54f0dbSXin Li cpuinfo_isa.atomics = true;
280*2b54f0dbSXin Li } else {
281*2b54f0dbSXin Li // Mandatory in ARMv8.1-A, list only cores released before iOS 15 / macOS 12
282*2b54f0dbSXin Li switch (cpu_family) {
283*2b54f0dbSXin Li case CPUFAMILY_ARM_MONSOON_MISTRAL:
284*2b54f0dbSXin Li case CPUFAMILY_ARM_VORTEX_TEMPEST:
285*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
286*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
287*2b54f0dbSXin Li cpuinfo_isa.atomics = true;
288*2b54f0dbSXin Li }
289*2b54f0dbSXin Li }
290*2b54f0dbSXin Li
291*2b54f0dbSXin Li const uint32_t has_feat_rdm = get_sys_info_by_name("hw.optional.arm.FEAT_RDM");
292*2b54f0dbSXin Li if (has_feat_rdm != 0) {
293*2b54f0dbSXin Li cpuinfo_isa.rdm = true;
294*2b54f0dbSXin Li } else {
295*2b54f0dbSXin Li // Optional in ARMv8.2-A (implemented in Apple cores),
296*2b54f0dbSXin Li // list only cores released before iOS 15 / macOS 12
297*2b54f0dbSXin Li switch (cpu_family) {
298*2b54f0dbSXin Li case CPUFAMILY_ARM_MONSOON_MISTRAL:
299*2b54f0dbSXin Li case CPUFAMILY_ARM_VORTEX_TEMPEST:
300*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
301*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
302*2b54f0dbSXin Li cpuinfo_isa.rdm = true;
303*2b54f0dbSXin Li }
304*2b54f0dbSXin Li }
305*2b54f0dbSXin Li
306*2b54f0dbSXin Li const uint32_t has_feat_fp16 = get_sys_info_by_name("hw.optional.arm.FEAT_FP16");
307*2b54f0dbSXin Li if (has_feat_fp16 != 0) {
308*2b54f0dbSXin Li cpuinfo_isa.fp16arith = true;
309*2b54f0dbSXin Li } else {
310*2b54f0dbSXin Li // Optional in ARMv8.2-A (implemented in Apple cores),
311*2b54f0dbSXin Li // list only cores released before iOS 15 / macOS 12
312*2b54f0dbSXin Li switch (cpu_family) {
313*2b54f0dbSXin Li case CPUFAMILY_ARM_MONSOON_MISTRAL:
314*2b54f0dbSXin Li case CPUFAMILY_ARM_VORTEX_TEMPEST:
315*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
316*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
317*2b54f0dbSXin Li cpuinfo_isa.fp16arith = true;
318*2b54f0dbSXin Li }
319*2b54f0dbSXin Li }
320*2b54f0dbSXin Li
321*2b54f0dbSXin Li const uint32_t has_feat_fhm = get_sys_info_by_name("hw.optional.arm.FEAT_FHM");
322*2b54f0dbSXin Li if (has_feat_fhm != 0) {
323*2b54f0dbSXin Li cpuinfo_isa.fhm = true;
324*2b54f0dbSXin Li } else {
325*2b54f0dbSXin Li // Prior to iOS 15, use 'hw.optional.armv8_2_fhm'
326*2b54f0dbSXin Li const uint32_t has_feat_fhm_legacy = get_sys_info_by_name("hw.optional.armv8_2_fhm");
327*2b54f0dbSXin Li if (has_feat_fhm_legacy != 0) {
328*2b54f0dbSXin Li cpuinfo_isa.fhm = true;
329*2b54f0dbSXin Li } else {
330*2b54f0dbSXin Li // Mandatory in ARMv8.4-A when FP16 arithmetics is implemented,
331*2b54f0dbSXin Li // list only cores released before iOS 15 / macOS 12
332*2b54f0dbSXin Li switch (cpu_family) {
333*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
334*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
335*2b54f0dbSXin Li cpuinfo_isa.fhm = true;
336*2b54f0dbSXin Li }
337*2b54f0dbSXin Li }
338*2b54f0dbSXin Li }
339*2b54f0dbSXin Li
340*2b54f0dbSXin Li const uint32_t has_feat_bf16 = get_sys_info_by_name("hw.optional.arm.FEAT_BF16");
341*2b54f0dbSXin Li if (has_feat_bf16 != 0) {
342*2b54f0dbSXin Li cpuinfo_isa.bf16 = true;
343*2b54f0dbSXin Li }
344*2b54f0dbSXin Li
345*2b54f0dbSXin Li const uint32_t has_feat_fcma = get_sys_info_by_name("hw.optional.arm.FEAT_FCMA");
346*2b54f0dbSXin Li if (has_feat_fcma != 0) {
347*2b54f0dbSXin Li cpuinfo_isa.fcma = true;
348*2b54f0dbSXin Li } else {
349*2b54f0dbSXin Li // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12
350*2b54f0dbSXin Li switch (cpu_family) {
351*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
352*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
353*2b54f0dbSXin Li cpuinfo_isa.fcma = true;
354*2b54f0dbSXin Li }
355*2b54f0dbSXin Li }
356*2b54f0dbSXin Li
357*2b54f0dbSXin Li const uint32_t has_feat_jscvt = get_sys_info_by_name("hw.optional.arm.FEAT_JSCVT");
358*2b54f0dbSXin Li if (has_feat_jscvt != 0) {
359*2b54f0dbSXin Li cpuinfo_isa.jscvt = true;
360*2b54f0dbSXin Li } else {
361*2b54f0dbSXin Li // Mandatory in ARMv8.3-A, list only cores released before iOS 15 / macOS 12
362*2b54f0dbSXin Li switch (cpu_family) {
363*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
364*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
365*2b54f0dbSXin Li cpuinfo_isa.jscvt = true;
366*2b54f0dbSXin Li }
367*2b54f0dbSXin Li }
368*2b54f0dbSXin Li
369*2b54f0dbSXin Li const uint32_t has_feat_dotprod = get_sys_info_by_name("hw.optional.arm.FEAT_DotProd");
370*2b54f0dbSXin Li if (has_feat_dotprod != 0) {
371*2b54f0dbSXin Li cpuinfo_isa.dot = true;
372*2b54f0dbSXin Li } else {
373*2b54f0dbSXin Li // Mandatory in ARMv8.4-A, list only cores released before iOS 15 / macOS 12
374*2b54f0dbSXin Li switch (cpu_family) {
375*2b54f0dbSXin Li case CPUFAMILY_ARM_LIGHTNING_THUNDER:
376*2b54f0dbSXin Li case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
377*2b54f0dbSXin Li cpuinfo_isa.dot = true;
378*2b54f0dbSXin Li }
379*2b54f0dbSXin Li }
380*2b54f0dbSXin Li
381*2b54f0dbSXin Li const uint32_t has_feat_i8mm = get_sys_info_by_name("hw.optional.arm.FEAT_I8MM");
382*2b54f0dbSXin Li if (has_feat_i8mm != 0) {
383*2b54f0dbSXin Li cpuinfo_isa.i8mm = true;
384*2b54f0dbSXin Li }
385*2b54f0dbSXin Li
386*2b54f0dbSXin Li uint32_t num_clusters = 1;
387*2b54f0dbSXin Li for (uint32_t i = 0; i < mach_topology.cores; i++) {
388*2b54f0dbSXin Li cores[i] = (struct cpuinfo_core) {
389*2b54f0dbSXin Li .processor_start = i * threads_per_core,
390*2b54f0dbSXin Li .processor_count = threads_per_core,
391*2b54f0dbSXin Li .core_id = i % cores_per_package,
392*2b54f0dbSXin Li .package = packages + i / cores_per_package,
393*2b54f0dbSXin Li .vendor = cpuinfo_vendor_apple,
394*2b54f0dbSXin Li .uarch = decode_uarch(cpu_family, i, mach_topology.cores),
395*2b54f0dbSXin Li };
396*2b54f0dbSXin Li if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
397*2b54f0dbSXin Li num_clusters++;
398*2b54f0dbSXin Li }
399*2b54f0dbSXin Li }
400*2b54f0dbSXin Li for (uint32_t i = 0; i < mach_topology.threads; i++) {
401*2b54f0dbSXin Li const uint32_t smt_id = i % threads_per_core;
402*2b54f0dbSXin Li const uint32_t core_id = i / threads_per_core;
403*2b54f0dbSXin Li const uint32_t package_id = i / threads_per_package;
404*2b54f0dbSXin Li
405*2b54f0dbSXin Li processors[i].smt_id = smt_id;
406*2b54f0dbSXin Li processors[i].core = &cores[core_id];
407*2b54f0dbSXin Li processors[i].package = &packages[package_id];
408*2b54f0dbSXin Li }
409*2b54f0dbSXin Li
410*2b54f0dbSXin Li clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster));
411*2b54f0dbSXin Li if (clusters == NULL) {
412*2b54f0dbSXin Li cpuinfo_log_error(
413*2b54f0dbSXin Li "failed to allocate %zu bytes for descriptions of %"PRIu32" clusters",
414*2b54f0dbSXin Li num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
415*2b54f0dbSXin Li goto cleanup;
416*2b54f0dbSXin Li }
417*2b54f0dbSXin Li uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
418*2b54f0dbSXin Li if (uarchs == NULL) {
419*2b54f0dbSXin Li cpuinfo_log_error(
420*2b54f0dbSXin Li "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
421*2b54f0dbSXin Li num_clusters * sizeof(enum cpuinfo_uarch), num_clusters);
422*2b54f0dbSXin Li goto cleanup;
423*2b54f0dbSXin Li }
424*2b54f0dbSXin Li uint32_t cluster_idx = UINT32_MAX;
425*2b54f0dbSXin Li for (uint32_t i = 0; i < mach_topology.cores; i++) {
426*2b54f0dbSXin Li if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
427*2b54f0dbSXin Li cluster_idx++;
428*2b54f0dbSXin Li uarchs[cluster_idx] = (struct cpuinfo_uarch_info) {
429*2b54f0dbSXin Li .uarch = cores[i].uarch,
430*2b54f0dbSXin Li .processor_count = 1,
431*2b54f0dbSXin Li .core_count = 1,
432*2b54f0dbSXin Li };
433*2b54f0dbSXin Li clusters[cluster_idx] = (struct cpuinfo_cluster) {
434*2b54f0dbSXin Li .processor_start = i * threads_per_core,
435*2b54f0dbSXin Li .processor_count = 1,
436*2b54f0dbSXin Li .core_start = i,
437*2b54f0dbSXin Li .core_count = 1,
438*2b54f0dbSXin Li .cluster_id = cluster_idx,
439*2b54f0dbSXin Li .package = cores[i].package,
440*2b54f0dbSXin Li .vendor = cores[i].vendor,
441*2b54f0dbSXin Li .uarch = cores[i].uarch,
442*2b54f0dbSXin Li };
443*2b54f0dbSXin Li } else {
444*2b54f0dbSXin Li uarchs[cluster_idx].processor_count++;
445*2b54f0dbSXin Li uarchs[cluster_idx].core_count++;
446*2b54f0dbSXin Li clusters[cluster_idx].processor_count++;
447*2b54f0dbSXin Li clusters[cluster_idx].core_count++;
448*2b54f0dbSXin Li }
449*2b54f0dbSXin Li cores[i].cluster = &clusters[cluster_idx];
450*2b54f0dbSXin Li }
451*2b54f0dbSXin Li
452*2b54f0dbSXin Li for (uint32_t i = 0; i < mach_topology.threads; i++) {
453*2b54f0dbSXin Li const uint32_t core_id = i / threads_per_core;
454*2b54f0dbSXin Li processors[i].cluster = cores[core_id].cluster;
455*2b54f0dbSXin Li }
456*2b54f0dbSXin Li
457*2b54f0dbSXin Li for (uint32_t i = 0; i < mach_topology.packages; i++) {
458*2b54f0dbSXin Li packages[i].cluster_start = 0;
459*2b54f0dbSXin Li packages[i].cluster_count = num_clusters;
460*2b54f0dbSXin Li }
461*2b54f0dbSXin Li
462*2b54f0dbSXin Li const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE");
463*2b54f0dbSXin Li const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE");
464*2b54f0dbSXin Li const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE");
465*2b54f0dbSXin Li const uint32_t l2_cache_size = get_sys_info(HW_L2CACHESIZE, "HW_L2CACHESIZE");
466*2b54f0dbSXin Li const uint32_t l3_cache_size = get_sys_info(HW_L3CACHESIZE, "HW_L3CACHESIZE");
467*2b54f0dbSXin Li const uint32_t l1_cache_associativity = 4;
468*2b54f0dbSXin Li const uint32_t l2_cache_associativity = 8;
469*2b54f0dbSXin Li const uint32_t l3_cache_associativity = 16;
470*2b54f0dbSXin Li const uint32_t cache_partitions = 1;
471*2b54f0dbSXin Li const uint32_t cache_flags = 0;
472*2b54f0dbSXin Li
473*2b54f0dbSXin Li uint32_t threads_per_l1 = 0, l1_count = 0;
474*2b54f0dbSXin Li if (l1i_cache_size != 0 || l1d_cache_size != 0) {
475*2b54f0dbSXin Li /* Assume L1 caches are private to each core */
476*2b54f0dbSXin Li threads_per_l1 = 1;
477*2b54f0dbSXin Li l1_count = mach_topology.threads / threads_per_l1;
478*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count);
479*2b54f0dbSXin Li }
480*2b54f0dbSXin Li
481*2b54f0dbSXin Li uint32_t threads_per_l2 = 0, l2_count = 0;
482*2b54f0dbSXin Li if (l2_cache_size != 0) {
483*2b54f0dbSXin Li /* Assume L2 cache is shared between all cores */
484*2b54f0dbSXin Li threads_per_l2 = mach_topology.cores;
485*2b54f0dbSXin Li l2_count = 1;
486*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
487*2b54f0dbSXin Li }
488*2b54f0dbSXin Li
489*2b54f0dbSXin Li uint32_t threads_per_l3 = 0, l3_count = 0;
490*2b54f0dbSXin Li if (l3_cache_size != 0) {
491*2b54f0dbSXin Li /* Assume L3 cache is shared between all cores */
492*2b54f0dbSXin Li threads_per_l3 = mach_topology.cores;
493*2b54f0dbSXin Li l3_count = 1;
494*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
495*2b54f0dbSXin Li }
496*2b54f0dbSXin Li
497*2b54f0dbSXin Li if (l1i_cache_size != 0) {
498*2b54f0dbSXin Li l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
499*2b54f0dbSXin Li if (l1i == NULL) {
500*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
501*2b54f0dbSXin Li l1_count * sizeof(struct cpuinfo_cache), l1_count);
502*2b54f0dbSXin Li goto cleanup;
503*2b54f0dbSXin Li }
504*2b54f0dbSXin Li for (uint32_t c = 0; c < l1_count; c++) {
505*2b54f0dbSXin Li l1i[c] = (struct cpuinfo_cache) {
506*2b54f0dbSXin Li .size = l1i_cache_size,
507*2b54f0dbSXin Li .associativity = l1_cache_associativity,
508*2b54f0dbSXin Li .sets = l1i_cache_size / (l1_cache_associativity * cacheline_size),
509*2b54f0dbSXin Li .partitions = cache_partitions,
510*2b54f0dbSXin Li .line_size = cacheline_size,
511*2b54f0dbSXin Li .flags = cache_flags,
512*2b54f0dbSXin Li .processor_start = c * threads_per_l1,
513*2b54f0dbSXin Li .processor_count = threads_per_l1,
514*2b54f0dbSXin Li };
515*2b54f0dbSXin Li }
516*2b54f0dbSXin Li for (uint32_t t = 0; t < mach_topology.threads; t++) {
517*2b54f0dbSXin Li processors[t].cache.l1i = &l1i[t / threads_per_l1];
518*2b54f0dbSXin Li }
519*2b54f0dbSXin Li }
520*2b54f0dbSXin Li
521*2b54f0dbSXin Li if (l1d_cache_size != 0) {
522*2b54f0dbSXin Li l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
523*2b54f0dbSXin Li if (l1d == NULL) {
524*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
525*2b54f0dbSXin Li l1_count * sizeof(struct cpuinfo_cache), l1_count);
526*2b54f0dbSXin Li goto cleanup;
527*2b54f0dbSXin Li }
528*2b54f0dbSXin Li for (uint32_t c = 0; c < l1_count; c++) {
529*2b54f0dbSXin Li l1d[c] = (struct cpuinfo_cache) {
530*2b54f0dbSXin Li .size = l1d_cache_size,
531*2b54f0dbSXin Li .associativity = l1_cache_associativity,
532*2b54f0dbSXin Li .sets = l1d_cache_size / (l1_cache_associativity * cacheline_size),
533*2b54f0dbSXin Li .partitions = cache_partitions,
534*2b54f0dbSXin Li .line_size = cacheline_size,
535*2b54f0dbSXin Li .flags = cache_flags,
536*2b54f0dbSXin Li .processor_start = c * threads_per_l1,
537*2b54f0dbSXin Li .processor_count = threads_per_l1,
538*2b54f0dbSXin Li };
539*2b54f0dbSXin Li }
540*2b54f0dbSXin Li for (uint32_t t = 0; t < mach_topology.threads; t++) {
541*2b54f0dbSXin Li processors[t].cache.l1d = &l1d[t / threads_per_l1];
542*2b54f0dbSXin Li }
543*2b54f0dbSXin Li }
544*2b54f0dbSXin Li
545*2b54f0dbSXin Li if (l2_count != 0) {
546*2b54f0dbSXin Li l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
547*2b54f0dbSXin Li if (l2 == NULL) {
548*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
549*2b54f0dbSXin Li l2_count * sizeof(struct cpuinfo_cache), l2_count);
550*2b54f0dbSXin Li goto cleanup;
551*2b54f0dbSXin Li }
552*2b54f0dbSXin Li for (uint32_t c = 0; c < l2_count; c++) {
553*2b54f0dbSXin Li l2[c] = (struct cpuinfo_cache) {
554*2b54f0dbSXin Li .size = l2_cache_size,
555*2b54f0dbSXin Li .associativity = l2_cache_associativity,
556*2b54f0dbSXin Li .sets = l2_cache_size / (l2_cache_associativity * cacheline_size),
557*2b54f0dbSXin Li .partitions = cache_partitions,
558*2b54f0dbSXin Li .line_size = cacheline_size,
559*2b54f0dbSXin Li .flags = cache_flags,
560*2b54f0dbSXin Li .processor_start = c * threads_per_l2,
561*2b54f0dbSXin Li .processor_count = threads_per_l2,
562*2b54f0dbSXin Li };
563*2b54f0dbSXin Li }
564*2b54f0dbSXin Li for (uint32_t t = 0; t < mach_topology.threads; t++) {
565*2b54f0dbSXin Li processors[t].cache.l2 = &l2[0];
566*2b54f0dbSXin Li }
567*2b54f0dbSXin Li }
568*2b54f0dbSXin Li
569*2b54f0dbSXin Li if (l3_count != 0) {
570*2b54f0dbSXin Li l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
571*2b54f0dbSXin Li if (l3 == NULL) {
572*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
573*2b54f0dbSXin Li l3_count * sizeof(struct cpuinfo_cache), l3_count);
574*2b54f0dbSXin Li goto cleanup;
575*2b54f0dbSXin Li }
576*2b54f0dbSXin Li for (uint32_t c = 0; c < l3_count; c++) {
577*2b54f0dbSXin Li l3[c] = (struct cpuinfo_cache) {
578*2b54f0dbSXin Li .size = l3_cache_size,
579*2b54f0dbSXin Li .associativity = l3_cache_associativity,
580*2b54f0dbSXin Li .sets = l3_cache_size / (l3_cache_associativity * cacheline_size),
581*2b54f0dbSXin Li .partitions = cache_partitions,
582*2b54f0dbSXin Li .line_size = cacheline_size,
583*2b54f0dbSXin Li .flags = cache_flags,
584*2b54f0dbSXin Li .processor_start = c * threads_per_l3,
585*2b54f0dbSXin Li .processor_count = threads_per_l3,
586*2b54f0dbSXin Li };
587*2b54f0dbSXin Li }
588*2b54f0dbSXin Li for (uint32_t t = 0; t < mach_topology.threads; t++) {
589*2b54f0dbSXin Li processors[t].cache.l3 = &l3[0];
590*2b54f0dbSXin Li }
591*2b54f0dbSXin Li }
592*2b54f0dbSXin Li
593*2b54f0dbSXin Li /* Commit changes */
594*2b54f0dbSXin Li cpuinfo_processors = processors;
595*2b54f0dbSXin Li cpuinfo_cores = cores;
596*2b54f0dbSXin Li cpuinfo_clusters = clusters;
597*2b54f0dbSXin Li cpuinfo_packages = packages;
598*2b54f0dbSXin Li cpuinfo_uarchs = uarchs;
599*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
600*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
601*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_2] = l2;
602*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_3] = l3;
603*2b54f0dbSXin Li
604*2b54f0dbSXin Li cpuinfo_processors_count = mach_topology.threads;
605*2b54f0dbSXin Li cpuinfo_cores_count = mach_topology.cores;
606*2b54f0dbSXin Li cpuinfo_clusters_count = num_clusters;
607*2b54f0dbSXin Li cpuinfo_packages_count = mach_topology.packages;
608*2b54f0dbSXin Li cpuinfo_uarchs_count = num_clusters;
609*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
610*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
611*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
612*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
613*2b54f0dbSXin Li cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
614*2b54f0dbSXin Li
615*2b54f0dbSXin Li __sync_synchronize();
616*2b54f0dbSXin Li
617*2b54f0dbSXin Li cpuinfo_is_initialized = true;
618*2b54f0dbSXin Li
619*2b54f0dbSXin Li processors = NULL;
620*2b54f0dbSXin Li cores = NULL;
621*2b54f0dbSXin Li clusters = NULL;
622*2b54f0dbSXin Li packages = NULL;
623*2b54f0dbSXin Li uarchs = NULL;
624*2b54f0dbSXin Li l1i = l1d = l2 = l3 = NULL;
625*2b54f0dbSXin Li
626*2b54f0dbSXin Li cleanup:
627*2b54f0dbSXin Li free(processors);
628*2b54f0dbSXin Li free(cores);
629*2b54f0dbSXin Li free(clusters);
630*2b54f0dbSXin Li free(packages);
631*2b54f0dbSXin Li free(uarchs);
632*2b54f0dbSXin Li free(l1i);
633*2b54f0dbSXin Li free(l1d);
634*2b54f0dbSXin Li free(l2);
635*2b54f0dbSXin Li free(l3);
636*2b54f0dbSXin Li }
637