1*c217d954SCole Faust /*
2*c217d954SCole Faust * Copyright (c) 2021-2023 Arm Limited.
3*c217d954SCole Faust *
4*c217d954SCole Faust * SPDX-License-Identifier: MIT
5*c217d954SCole Faust *
6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust *
13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust * copies or substantial portions of the Software.
15*c217d954SCole Faust *
16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust * SOFTWARE.
23*c217d954SCole Faust */
24*c217d954SCole Faust #include "src/common/cpuinfo/CpuInfo.h"
25*c217d954SCole Faust
26*c217d954SCole Faust #include "arm_compute/core/Error.h"
27*c217d954SCole Faust #include "arm_compute/core/Log.h"
28*c217d954SCole Faust #include "support/StringSupport.h"
29*c217d954SCole Faust #include "support/ToolchainSupport.h"
30*c217d954SCole Faust
31*c217d954SCole Faust #include <sstream>
32*c217d954SCole Faust
33*c217d954SCole Faust #if !defined(BARE_METAL)
34*c217d954SCole Faust #include <algorithm>
35*c217d954SCole Faust #include <cstring>
36*c217d954SCole Faust #include <fstream>
37*c217d954SCole Faust #if !defined(_WIN64)
38*c217d954SCole Faust #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */
39*c217d954SCole Faust #include <sched.h>
40*c217d954SCole Faust #endif /* !defined(_WIN64) */
41*c217d954SCole Faust
42*c217d954SCole Faust #include <thread>
43*c217d954SCole Faust #include <unordered_map>
44*c217d954SCole Faust #endif /* !defined(BARE_METAL) */
45*c217d954SCole Faust
46*c217d954SCole Faust #if !defined(_WIN64)
47*c217d954SCole Faust #if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
48*c217d954SCole Faust #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */
49*c217d954SCole Faust #include <sys/auxv.h>
50*c217d954SCole Faust #elif defined(__APPLE__) && defined(__aarch64__)
51*c217d954SCole Faust #include <sys/sysctl.h>
52*c217d954SCole Faust #include <sys/types.h>
53*c217d954SCole Faust #endif /* defined(__APPLE__) && defined(__aarch64__)) */
54*c217d954SCole Faust #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
55*c217d954SCole Faust
56*c217d954SCole Faust #define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11)
57*c217d954SCole Faust #define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \
58*c217d954SCole Faust : "=r"(var))
59*c217d954SCole Faust namespace arm_compute
60*c217d954SCole Faust {
61*c217d954SCole Faust namespace cpuinfo
62*c217d954SCole Faust {
63*c217d954SCole Faust namespace
64*c217d954SCole Faust {
65*c217d954SCole Faust #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
66*c217d954SCole Faust /** Extract MIDR using CPUID information that are exposed to user-space
67*c217d954SCole Faust *
68*c217d954SCole Faust * @param[in] max_num_cpus Maximum number of possible CPUs
69*c217d954SCole Faust *
70*c217d954SCole Faust * @return std::vector<uint32_t> A list of the MIDR of each core
71*c217d954SCole Faust */
midr_from_cpuid(uint32_t max_num_cpus)72*c217d954SCole Faust std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus)
73*c217d954SCole Faust {
74*c217d954SCole Faust std::vector<uint32_t> cpus;
75*c217d954SCole Faust for(unsigned int i = 0; i < max_num_cpus; ++i)
76*c217d954SCole Faust {
77*c217d954SCole Faust std::stringstream str;
78*c217d954SCole Faust str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1";
79*c217d954SCole Faust std::ifstream file(str.str(), std::ios::in);
80*c217d954SCole Faust if(file.is_open())
81*c217d954SCole Faust {
82*c217d954SCole Faust std::string line;
83*c217d954SCole Faust if(bool(getline(file, line)))
84*c217d954SCole Faust {
85*c217d954SCole Faust cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16));
86*c217d954SCole Faust }
87*c217d954SCole Faust }
88*c217d954SCole Faust }
89*c217d954SCole Faust return cpus;
90*c217d954SCole Faust }
91*c217d954SCole Faust
92*c217d954SCole Faust /** Extract MIDR by parsing the /proc/cpuinfo meta-data
93*c217d954SCole Faust *
94*c217d954SCole Faust * @param[in] max_num_cpus Maximum number of possible CPUs
95*c217d954SCole Faust *
96*c217d954SCole Faust * @return std::vector<uint32_t> A list of the MIDR of each core
97*c217d954SCole Faust */
midr_from_proc_cpuinfo(int max_num_cpus)98*c217d954SCole Faust std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus)
99*c217d954SCole Faust {
100*c217d954SCole Faust std::vector<uint32_t> cpus;
101*c217d954SCole Faust
102*c217d954SCole Faust regex_t proc_regex;
103*c217d954SCole Faust regex_t imp_regex;
104*c217d954SCole Faust regex_t var_regex;
105*c217d954SCole Faust regex_t part_regex;
106*c217d954SCole Faust regex_t rev_regex;
107*c217d954SCole Faust
108*c217d954SCole Faust memset(&proc_regex, 0, sizeof(regex_t));
109*c217d954SCole Faust memset(&imp_regex, 0, sizeof(regex_t));
110*c217d954SCole Faust memset(&var_regex, 0, sizeof(regex_t));
111*c217d954SCole Faust memset(&part_regex, 0, sizeof(regex_t));
112*c217d954SCole Faust memset(&rev_regex, 0, sizeof(regex_t));
113*c217d954SCole Faust
114*c217d954SCole Faust int ret_status = 0;
115*c217d954SCole Faust // If "long-form" cpuinfo is present, parse that to populate models.
116*c217d954SCole Faust ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
117*c217d954SCole Faust ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
118*c217d954SCole Faust ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
119*c217d954SCole Faust ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
120*c217d954SCole Faust ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
121*c217d954SCole Faust ARM_COMPUTE_UNUSED(ret_status);
122*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
123*c217d954SCole Faust
124*c217d954SCole Faust std::ifstream file("/proc/cpuinfo", std::ios::in);
125*c217d954SCole Faust if(file.is_open())
126*c217d954SCole Faust {
127*c217d954SCole Faust std::string line;
128*c217d954SCole Faust int midr = 0;
129*c217d954SCole Faust int curcpu = -1;
130*c217d954SCole Faust
131*c217d954SCole Faust while(bool(getline(file, line)))
132*c217d954SCole Faust {
133*c217d954SCole Faust std::array<regmatch_t, 2> match;
134*c217d954SCole Faust ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0);
135*c217d954SCole Faust if(ret_status == 0)
136*c217d954SCole Faust {
137*c217d954SCole Faust std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
138*c217d954SCole Faust int newcpu = support::cpp11::stoi(id, nullptr);
139*c217d954SCole Faust
140*c217d954SCole Faust if(curcpu >= 0 && midr == 0)
141*c217d954SCole Faust {
142*c217d954SCole Faust // Matched a new CPU ID without any description of the previous one - looks like old format.
143*c217d954SCole Faust return {};
144*c217d954SCole Faust }
145*c217d954SCole Faust
146*c217d954SCole Faust if(curcpu >= 0 && curcpu < max_num_cpus)
147*c217d954SCole Faust {
148*c217d954SCole Faust cpus.emplace_back(midr);
149*c217d954SCole Faust }
150*c217d954SCole Faust else
151*c217d954SCole Faust {
152*c217d954SCole Faust ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
153*c217d954SCole Faust }
154*c217d954SCole Faust
155*c217d954SCole Faust midr = 0;
156*c217d954SCole Faust curcpu = newcpu;
157*c217d954SCole Faust
158*c217d954SCole Faust continue;
159*c217d954SCole Faust }
160*c217d954SCole Faust
161*c217d954SCole Faust ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0);
162*c217d954SCole Faust if(ret_status == 0)
163*c217d954SCole Faust {
164*c217d954SCole Faust std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
165*c217d954SCole Faust int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
166*c217d954SCole Faust midr |= (impv << 24);
167*c217d954SCole Faust
168*c217d954SCole Faust continue;
169*c217d954SCole Faust }
170*c217d954SCole Faust
171*c217d954SCole Faust ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0);
172*c217d954SCole Faust if(ret_status == 0)
173*c217d954SCole Faust {
174*c217d954SCole Faust std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
175*c217d954SCole Faust int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
176*c217d954SCole Faust midr |= (varv << 20);
177*c217d954SCole Faust
178*c217d954SCole Faust continue;
179*c217d954SCole Faust }
180*c217d954SCole Faust
181*c217d954SCole Faust ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0);
182*c217d954SCole Faust if(ret_status == 0)
183*c217d954SCole Faust {
184*c217d954SCole Faust std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
185*c217d954SCole Faust int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
186*c217d954SCole Faust midr |= (partv << 4);
187*c217d954SCole Faust
188*c217d954SCole Faust continue;
189*c217d954SCole Faust }
190*c217d954SCole Faust
191*c217d954SCole Faust ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0);
192*c217d954SCole Faust if(ret_status == 0)
193*c217d954SCole Faust {
194*c217d954SCole Faust std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
195*c217d954SCole Faust int regv = support::cpp11::stoi(subexp, nullptr);
196*c217d954SCole Faust midr |= (regv);
197*c217d954SCole Faust midr |= (0xf << 16);
198*c217d954SCole Faust
199*c217d954SCole Faust continue;
200*c217d954SCole Faust }
201*c217d954SCole Faust }
202*c217d954SCole Faust
203*c217d954SCole Faust if(curcpu >= 0 && curcpu < max_num_cpus)
204*c217d954SCole Faust {
205*c217d954SCole Faust cpus.emplace_back(midr);
206*c217d954SCole Faust }
207*c217d954SCole Faust else
208*c217d954SCole Faust {
209*c217d954SCole Faust ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
210*c217d954SCole Faust }
211*c217d954SCole Faust }
212*c217d954SCole Faust
213*c217d954SCole Faust // Free allocated memory
214*c217d954SCole Faust regfree(&proc_regex);
215*c217d954SCole Faust regfree(&imp_regex);
216*c217d954SCole Faust regfree(&var_regex);
217*c217d954SCole Faust regfree(&part_regex);
218*c217d954SCole Faust regfree(&rev_regex);
219*c217d954SCole Faust
220*c217d954SCole Faust return cpus;
221*c217d954SCole Faust }
222*c217d954SCole Faust
223*c217d954SCole Faust /** Get the maximim number of CPUs in the system by parsing /sys/devices/system/cpu/present
224*c217d954SCole Faust *
225*c217d954SCole Faust * @return int Maximum number of CPUs
226*c217d954SCole Faust */
get_max_cpus()227*c217d954SCole Faust int get_max_cpus()
228*c217d954SCole Faust {
229*c217d954SCole Faust int max_cpus = 1;
230*c217d954SCole Faust std::ifstream CPUspresent;
231*c217d954SCole Faust CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
232*c217d954SCole Faust bool success = false;
233*c217d954SCole Faust
234*c217d954SCole Faust if(CPUspresent.is_open())
235*c217d954SCole Faust {
236*c217d954SCole Faust std::string line;
237*c217d954SCole Faust
238*c217d954SCole Faust if(bool(getline(CPUspresent, line)))
239*c217d954SCole Faust {
240*c217d954SCole Faust /* The content of this file is a list of ranges or single values, e.g.
241*c217d954SCole Faust * 0-5, or 1-3,5,7 or similar. As we are interested in the
242*c217d954SCole Faust * max valid ID, we just need to find the last valid
243*c217d954SCole Faust * delimiter ('-' or ',') and parse the integer immediately after that.
244*c217d954SCole Faust */
245*c217d954SCole Faust auto startfrom = line.begin();
246*c217d954SCole Faust
247*c217d954SCole Faust for(auto i = line.begin(); i < line.end(); ++i)
248*c217d954SCole Faust {
249*c217d954SCole Faust if(*i == '-' || *i == ',')
250*c217d954SCole Faust {
251*c217d954SCole Faust startfrom = i + 1;
252*c217d954SCole Faust }
253*c217d954SCole Faust }
254*c217d954SCole Faust
255*c217d954SCole Faust line.erase(line.begin(), startfrom);
256*c217d954SCole Faust
257*c217d954SCole Faust max_cpus = support::cpp11::stoi(line, nullptr) + 1;
258*c217d954SCole Faust success = true;
259*c217d954SCole Faust }
260*c217d954SCole Faust }
261*c217d954SCole Faust
262*c217d954SCole Faust // Return std::thread::hardware_concurrency() as a fallback.
263*c217d954SCole Faust if(!success)
264*c217d954SCole Faust {
265*c217d954SCole Faust max_cpus = std::thread::hardware_concurrency();
266*c217d954SCole Faust }
267*c217d954SCole Faust return max_cpus;
268*c217d954SCole Faust }
269*c217d954SCole Faust #elif defined(__aarch64__) && defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
270*c217d954SCole Faust /** Query features through sysctlbyname
271*c217d954SCole Faust *
272*c217d954SCole Faust * @return int value queried
273*c217d954SCole Faust */
274*c217d954SCole Faust int get_hw_capability(const std::string &cap)
275*c217d954SCole Faust {
276*c217d954SCole Faust int64_t result(0);
277*c217d954SCole Faust size_t size = sizeof(result);
278*c217d954SCole Faust sysctlbyname(cap.c_str(), &result, &size, NULL, 0);
279*c217d954SCole Faust return result;
280*c217d954SCole Faust }
281*c217d954SCole Faust #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
282*c217d954SCole Faust
283*c217d954SCole Faust #if defined(BARE_METAL) && defined(__aarch64__)
get_sve_feature_reg()284*c217d954SCole Faust uint64_t get_sve_feature_reg()
285*c217d954SCole Faust {
286*c217d954SCole Faust uint64_t svefr0 = 0;
287*c217d954SCole Faust __asm __volatile(
288*c217d954SCole Faust ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n"
289*c217d954SCole Faust "MOV %0, X3"
290*c217d954SCole Faust : "=r"(svefr0)
291*c217d954SCole Faust :
292*c217d954SCole Faust : "x3");
293*c217d954SCole Faust return svefr0;
294*c217d954SCole Faust }
295*c217d954SCole Faust #endif /* defined(BARE_METAL) && defined(__aarch64__) */
296*c217d954SCole Faust } // namespace
297*c217d954SCole Faust
CpuInfo(CpuIsaInfo isa,std::vector<CpuModel> cpus)298*c217d954SCole Faust CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus)
299*c217d954SCole Faust : _isa(std::move(isa)), _cpus(std::move(cpus))
300*c217d954SCole Faust {
301*c217d954SCole Faust }
302*c217d954SCole Faust
build()303*c217d954SCole Faust CpuInfo CpuInfo::build()
304*c217d954SCole Faust {
305*c217d954SCole Faust #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
306*c217d954SCole Faust const uint32_t hwcaps = getauxval(AT_HWCAP);
307*c217d954SCole Faust const uint32_t hwcaps2 = getauxval(AT_HWCAP2);
308*c217d954SCole Faust const uint32_t max_cpus = get_max_cpus();
309*c217d954SCole Faust
310*c217d954SCole Faust // Populate midr values
311*c217d954SCole Faust std::vector<uint32_t> cpus_midr;
312*c217d954SCole Faust if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID)
313*c217d954SCole Faust {
314*c217d954SCole Faust cpus_midr = midr_from_cpuid(max_cpus);
315*c217d954SCole Faust }
316*c217d954SCole Faust if(cpus_midr.empty())
317*c217d954SCole Faust {
318*c217d954SCole Faust cpus_midr = midr_from_proc_cpuinfo(max_cpus);
319*c217d954SCole Faust }
320*c217d954SCole Faust if(cpus_midr.empty())
321*c217d954SCole Faust {
322*c217d954SCole Faust cpus_midr.resize(max_cpus, 0);
323*c217d954SCole Faust }
324*c217d954SCole Faust
325*c217d954SCole Faust // Populate isa (Assume homogeneous ISA specification)
326*c217d954SCole Faust CpuIsaInfo isa = init_cpu_isa_from_hwcaps(hwcaps, hwcaps2, cpus_midr.back());
327*c217d954SCole Faust
328*c217d954SCole Faust // Convert midr to models
329*c217d954SCole Faust std::vector<CpuModel> cpus_model;
330*c217d954SCole Faust std::transform(std::begin(cpus_midr), std::end(cpus_midr), std::back_inserter(cpus_model),
331*c217d954SCole Faust [](uint32_t midr) -> CpuModel { return midr_to_model(midr); });
332*c217d954SCole Faust
333*c217d954SCole Faust CpuInfo info(isa, cpus_model);
334*c217d954SCole Faust return info;
335*c217d954SCole Faust
336*c217d954SCole Faust #elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
337*c217d954SCole Faust
338*c217d954SCole Faust // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly.
339*c217d954SCole Faust uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0;
340*c217d954SCole Faust ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1);
341*c217d954SCole Faust ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1);
342*c217d954SCole Faust ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1);
343*c217d954SCole Faust ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1);
344*c217d954SCole Faust ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1);
345*c217d954SCole Faust if((pfr0 >> 32) & 0xf)
346*c217d954SCole Faust {
347*c217d954SCole Faust svefr0 = get_sve_feature_reg();
348*c217d954SCole Faust }
349*c217d954SCole Faust
350*c217d954SCole Faust CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr);
351*c217d954SCole Faust std::vector<CpuModel> cpus_model(1, midr_to_model(midr));
352*c217d954SCole Faust CpuInfo info(isa, cpus_model);
353*c217d954SCole Faust return info;
354*c217d954SCole Faust #elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */
355*c217d954SCole Faust int ncpus = get_hw_capability("hw.perflevel0.logicalcpu");
356*c217d954SCole Faust CpuIsaInfo isainfo;
357*c217d954SCole Faust std::vector<CpuModel> cpus_model(ncpus);
358*c217d954SCole Faust isainfo.neon = get_hw_capability("hw.optional.neon");
359*c217d954SCole Faust isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16");
360*c217d954SCole Faust isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd");
361*c217d954SCole Faust CpuInfo info(isainfo, cpus_model);
362*c217d954SCole Faust return info;
363*c217d954SCole Faust #else /* #elif defined(__aarch64__) && defined(__APPLE__) */
364*c217d954SCole Faust CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC });
365*c217d954SCole Faust return info;
366*c217d954SCole Faust #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
367*c217d954SCole Faust }
368*c217d954SCole Faust
cpu_model(uint32_t cpuid) const369*c217d954SCole Faust CpuModel CpuInfo::cpu_model(uint32_t cpuid) const
370*c217d954SCole Faust {
371*c217d954SCole Faust if(cpuid < _cpus.size())
372*c217d954SCole Faust {
373*c217d954SCole Faust return _cpus[cpuid];
374*c217d954SCole Faust }
375*c217d954SCole Faust return CpuModel::GENERIC;
376*c217d954SCole Faust }
377*c217d954SCole Faust
cpu_model() const378*c217d954SCole Faust CpuModel CpuInfo::cpu_model() const
379*c217d954SCole Faust {
380*c217d954SCole Faust #if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__))
381*c217d954SCole Faust return cpu_model(0);
382*c217d954SCole Faust #else /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
383*c217d954SCole Faust return cpu_model(sched_getcpu());
384*c217d954SCole Faust #endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
385*c217d954SCole Faust }
386*c217d954SCole Faust
num_cpus() const387*c217d954SCole Faust uint32_t CpuInfo::num_cpus() const
388*c217d954SCole Faust {
389*c217d954SCole Faust return _cpus.size();
390*c217d954SCole Faust }
391*c217d954SCole Faust
num_threads_hint()392*c217d954SCole Faust uint32_t num_threads_hint()
393*c217d954SCole Faust {
394*c217d954SCole Faust unsigned int num_threads_hint = 1;
395*c217d954SCole Faust
396*c217d954SCole Faust #if !defined(BARE_METAL) && !defined(_WIN64)
397*c217d954SCole Faust std::vector<std::string> cpus;
398*c217d954SCole Faust cpus.reserve(64);
399*c217d954SCole Faust
400*c217d954SCole Faust // CPU part regex
401*c217d954SCole Faust regex_t cpu_part_rgx;
402*c217d954SCole Faust memset(&cpu_part_rgx, 0, sizeof(regex_t));
403*c217d954SCole Faust int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
404*c217d954SCole Faust ARM_COMPUTE_UNUSED(ret_status);
405*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
406*c217d954SCole Faust
407*c217d954SCole Faust // Read cpuinfo and get occurrence of each core
408*c217d954SCole Faust std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in);
409*c217d954SCole Faust if(cpuinfo_file.is_open())
410*c217d954SCole Faust {
411*c217d954SCole Faust std::string line;
412*c217d954SCole Faust while(bool(getline(cpuinfo_file, line)))
413*c217d954SCole Faust {
414*c217d954SCole Faust std::array<regmatch_t, 2> match;
415*c217d954SCole Faust if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0)
416*c217d954SCole Faust {
417*c217d954SCole Faust cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)));
418*c217d954SCole Faust }
419*c217d954SCole Faust }
420*c217d954SCole Faust }
421*c217d954SCole Faust regfree(&cpu_part_rgx);
422*c217d954SCole Faust
423*c217d954SCole Faust // Get min number of threads
424*c217d954SCole Faust std::sort(std::begin(cpus), std::end(cpus));
425*c217d954SCole Faust auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t
426*c217d954SCole Faust {
427*c217d954SCole Faust std::unordered_map<std::string, uint32_t> cpus_freq;
428*c217d954SCole Faust for(const auto &cpu : cpus)
429*c217d954SCole Faust {
430*c217d954SCole Faust cpus_freq[cpu]++;
431*c217d954SCole Faust }
432*c217d954SCole Faust
433*c217d954SCole Faust uint32_t vmin = cpus.size() + 1;
434*c217d954SCole Faust for(const auto &cpu_freq : cpus_freq)
435*c217d954SCole Faust {
436*c217d954SCole Faust vmin = std::min(vmin, cpu_freq.second);
437*c217d954SCole Faust }
438*c217d954SCole Faust return vmin;
439*c217d954SCole Faust };
440*c217d954SCole Faust
441*c217d954SCole Faust // Set thread hint
442*c217d954SCole Faust num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus);
443*c217d954SCole Faust #endif /* !defined(BARE_METAL) */
444*c217d954SCole Faust
445*c217d954SCole Faust return num_threads_hint;
446*c217d954SCole Faust }
447*c217d954SCole Faust } // namespace cpuinfo
448*c217d954SCole Faust } // namespace arm_compute
449