xref: /aosp_15_r20/external/ComputeLibrary/src/common/cpuinfo/CpuInfo.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2021-2023 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "src/common/cpuinfo/CpuInfo.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "arm_compute/core/Error.h"
27*c217d954SCole Faust #include "arm_compute/core/Log.h"
28*c217d954SCole Faust #include "support/StringSupport.h"
29*c217d954SCole Faust #include "support/ToolchainSupport.h"
30*c217d954SCole Faust 
31*c217d954SCole Faust #include <sstream>
32*c217d954SCole Faust 
33*c217d954SCole Faust #if !defined(BARE_METAL)
34*c217d954SCole Faust #include <algorithm>
35*c217d954SCole Faust #include <cstring>
36*c217d954SCole Faust #include <fstream>
37*c217d954SCole Faust #if !defined(_WIN64)
38*c217d954SCole Faust #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */
39*c217d954SCole Faust #include <sched.h>
40*c217d954SCole Faust #endif /* !defined(_WIN64) */
41*c217d954SCole Faust 
42*c217d954SCole Faust #include <thread>
43*c217d954SCole Faust #include <unordered_map>
44*c217d954SCole Faust #endif /* !defined(BARE_METAL) */
45*c217d954SCole Faust 
46*c217d954SCole Faust #if !defined(_WIN64)
47*c217d954SCole Faust #if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
48*c217d954SCole Faust #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */
49*c217d954SCole Faust #include <sys/auxv.h>
50*c217d954SCole Faust #elif defined(__APPLE__) && defined(__aarch64__)
51*c217d954SCole Faust #include <sys/sysctl.h>
52*c217d954SCole Faust #include <sys/types.h>
53*c217d954SCole Faust #endif /* defined(__APPLE__) && defined(__aarch64__)) */
54*c217d954SCole Faust #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
55*c217d954SCole Faust 
56*c217d954SCole Faust #define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11)
57*c217d954SCole Faust #define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \
58*c217d954SCole Faust                                                                 : "=r"(var))
59*c217d954SCole Faust namespace arm_compute
60*c217d954SCole Faust {
61*c217d954SCole Faust namespace cpuinfo
62*c217d954SCole Faust {
63*c217d954SCole Faust namespace
64*c217d954SCole Faust {
65*c217d954SCole Faust #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
66*c217d954SCole Faust /** Extract MIDR using CPUID information that are exposed to user-space
67*c217d954SCole Faust  *
68*c217d954SCole Faust  * @param[in] max_num_cpus Maximum number of possible CPUs
69*c217d954SCole Faust  *
70*c217d954SCole Faust  * @return std::vector<uint32_t> A list of the MIDR of each core
71*c217d954SCole Faust  */
midr_from_cpuid(uint32_t max_num_cpus)72*c217d954SCole Faust std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus)
73*c217d954SCole Faust {
74*c217d954SCole Faust     std::vector<uint32_t> cpus;
75*c217d954SCole Faust     for(unsigned int i = 0; i < max_num_cpus; ++i)
76*c217d954SCole Faust     {
77*c217d954SCole Faust         std::stringstream str;
78*c217d954SCole Faust         str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1";
79*c217d954SCole Faust         std::ifstream file(str.str(), std::ios::in);
80*c217d954SCole Faust         if(file.is_open())
81*c217d954SCole Faust         {
82*c217d954SCole Faust             std::string line;
83*c217d954SCole Faust             if(bool(getline(file, line)))
84*c217d954SCole Faust             {
85*c217d954SCole Faust                 cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16));
86*c217d954SCole Faust             }
87*c217d954SCole Faust         }
88*c217d954SCole Faust     }
89*c217d954SCole Faust     return cpus;
90*c217d954SCole Faust }
91*c217d954SCole Faust 
92*c217d954SCole Faust /** Extract MIDR by parsing the /proc/cpuinfo meta-data
93*c217d954SCole Faust  *
94*c217d954SCole Faust  * @param[in] max_num_cpus Maximum number of possible CPUs
95*c217d954SCole Faust  *
96*c217d954SCole Faust  * @return std::vector<uint32_t> A list of the MIDR of each core
97*c217d954SCole Faust  */
midr_from_proc_cpuinfo(int max_num_cpus)98*c217d954SCole Faust std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus)
99*c217d954SCole Faust {
100*c217d954SCole Faust     std::vector<uint32_t> cpus;
101*c217d954SCole Faust 
102*c217d954SCole Faust     regex_t proc_regex;
103*c217d954SCole Faust     regex_t imp_regex;
104*c217d954SCole Faust     regex_t var_regex;
105*c217d954SCole Faust     regex_t part_regex;
106*c217d954SCole Faust     regex_t rev_regex;
107*c217d954SCole Faust 
108*c217d954SCole Faust     memset(&proc_regex, 0, sizeof(regex_t));
109*c217d954SCole Faust     memset(&imp_regex, 0, sizeof(regex_t));
110*c217d954SCole Faust     memset(&var_regex, 0, sizeof(regex_t));
111*c217d954SCole Faust     memset(&part_regex, 0, sizeof(regex_t));
112*c217d954SCole Faust     memset(&rev_regex, 0, sizeof(regex_t));
113*c217d954SCole Faust 
114*c217d954SCole Faust     int ret_status = 0;
115*c217d954SCole Faust     // If "long-form" cpuinfo is present, parse that to populate models.
116*c217d954SCole Faust     ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
117*c217d954SCole Faust     ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
118*c217d954SCole Faust     ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
119*c217d954SCole Faust     ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
120*c217d954SCole Faust     ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
121*c217d954SCole Faust     ARM_COMPUTE_UNUSED(ret_status);
122*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
123*c217d954SCole Faust 
124*c217d954SCole Faust     std::ifstream file("/proc/cpuinfo", std::ios::in);
125*c217d954SCole Faust     if(file.is_open())
126*c217d954SCole Faust     {
127*c217d954SCole Faust         std::string line;
128*c217d954SCole Faust         int         midr   = 0;
129*c217d954SCole Faust         int         curcpu = -1;
130*c217d954SCole Faust 
131*c217d954SCole Faust         while(bool(getline(file, line)))
132*c217d954SCole Faust         {
133*c217d954SCole Faust             std::array<regmatch_t, 2> match;
134*c217d954SCole Faust             ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0);
135*c217d954SCole Faust             if(ret_status == 0)
136*c217d954SCole Faust             {
137*c217d954SCole Faust                 std::string id     = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
138*c217d954SCole Faust                 int         newcpu = support::cpp11::stoi(id, nullptr);
139*c217d954SCole Faust 
140*c217d954SCole Faust                 if(curcpu >= 0 && midr == 0)
141*c217d954SCole Faust                 {
142*c217d954SCole Faust                     // Matched a new CPU ID without any description of the previous one - looks like old format.
143*c217d954SCole Faust                     return {};
144*c217d954SCole Faust                 }
145*c217d954SCole Faust 
146*c217d954SCole Faust                 if(curcpu >= 0 && curcpu < max_num_cpus)
147*c217d954SCole Faust                 {
148*c217d954SCole Faust                     cpus.emplace_back(midr);
149*c217d954SCole Faust                 }
150*c217d954SCole Faust                 else
151*c217d954SCole Faust                 {
152*c217d954SCole Faust                     ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
153*c217d954SCole Faust                 }
154*c217d954SCole Faust 
155*c217d954SCole Faust                 midr   = 0;
156*c217d954SCole Faust                 curcpu = newcpu;
157*c217d954SCole Faust 
158*c217d954SCole Faust                 continue;
159*c217d954SCole Faust             }
160*c217d954SCole Faust 
161*c217d954SCole Faust             ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0);
162*c217d954SCole Faust             if(ret_status == 0)
163*c217d954SCole Faust             {
164*c217d954SCole Faust                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
165*c217d954SCole Faust                 int         impv   = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
166*c217d954SCole Faust                 midr |= (impv << 24);
167*c217d954SCole Faust 
168*c217d954SCole Faust                 continue;
169*c217d954SCole Faust             }
170*c217d954SCole Faust 
171*c217d954SCole Faust             ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0);
172*c217d954SCole Faust             if(ret_status == 0)
173*c217d954SCole Faust             {
174*c217d954SCole Faust                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
175*c217d954SCole Faust                 int         varv   = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
176*c217d954SCole Faust                 midr |= (varv << 20);
177*c217d954SCole Faust 
178*c217d954SCole Faust                 continue;
179*c217d954SCole Faust             }
180*c217d954SCole Faust 
181*c217d954SCole Faust             ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0);
182*c217d954SCole Faust             if(ret_status == 0)
183*c217d954SCole Faust             {
184*c217d954SCole Faust                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
185*c217d954SCole Faust                 int         partv  = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
186*c217d954SCole Faust                 midr |= (partv << 4);
187*c217d954SCole Faust 
188*c217d954SCole Faust                 continue;
189*c217d954SCole Faust             }
190*c217d954SCole Faust 
191*c217d954SCole Faust             ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0);
192*c217d954SCole Faust             if(ret_status == 0)
193*c217d954SCole Faust             {
194*c217d954SCole Faust                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
195*c217d954SCole Faust                 int         regv   = support::cpp11::stoi(subexp, nullptr);
196*c217d954SCole Faust                 midr |= (regv);
197*c217d954SCole Faust                 midr |= (0xf << 16);
198*c217d954SCole Faust 
199*c217d954SCole Faust                 continue;
200*c217d954SCole Faust             }
201*c217d954SCole Faust         }
202*c217d954SCole Faust 
203*c217d954SCole Faust         if(curcpu >= 0 && curcpu < max_num_cpus)
204*c217d954SCole Faust         {
205*c217d954SCole Faust             cpus.emplace_back(midr);
206*c217d954SCole Faust         }
207*c217d954SCole Faust         else
208*c217d954SCole Faust         {
209*c217d954SCole Faust             ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
210*c217d954SCole Faust         }
211*c217d954SCole Faust     }
212*c217d954SCole Faust 
213*c217d954SCole Faust     // Free allocated memory
214*c217d954SCole Faust     regfree(&proc_regex);
215*c217d954SCole Faust     regfree(&imp_regex);
216*c217d954SCole Faust     regfree(&var_regex);
217*c217d954SCole Faust     regfree(&part_regex);
218*c217d954SCole Faust     regfree(&rev_regex);
219*c217d954SCole Faust 
220*c217d954SCole Faust     return cpus;
221*c217d954SCole Faust }
222*c217d954SCole Faust 
223*c217d954SCole Faust /** Get the maximim number of CPUs in the system by parsing /sys/devices/system/cpu/present
224*c217d954SCole Faust  *
225*c217d954SCole Faust  * @return int Maximum number of CPUs
226*c217d954SCole Faust  */
get_max_cpus()227*c217d954SCole Faust int get_max_cpus()
228*c217d954SCole Faust {
229*c217d954SCole Faust     int           max_cpus = 1;
230*c217d954SCole Faust     std::ifstream CPUspresent;
231*c217d954SCole Faust     CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
232*c217d954SCole Faust     bool success = false;
233*c217d954SCole Faust 
234*c217d954SCole Faust     if(CPUspresent.is_open())
235*c217d954SCole Faust     {
236*c217d954SCole Faust         std::string line;
237*c217d954SCole Faust 
238*c217d954SCole Faust         if(bool(getline(CPUspresent, line)))
239*c217d954SCole Faust         {
240*c217d954SCole Faust             /* The content of this file is a list of ranges or single values, e.g.
241*c217d954SCole Faust                  * 0-5, or 1-3,5,7 or similar.  As we are interested in the
242*c217d954SCole Faust                  * max valid ID, we just need to find the last valid
243*c217d954SCole Faust                  * delimiter ('-' or ',') and parse the integer immediately after that.
244*c217d954SCole Faust                  */
245*c217d954SCole Faust             auto startfrom = line.begin();
246*c217d954SCole Faust 
247*c217d954SCole Faust             for(auto i = line.begin(); i < line.end(); ++i)
248*c217d954SCole Faust             {
249*c217d954SCole Faust                 if(*i == '-' || *i == ',')
250*c217d954SCole Faust                 {
251*c217d954SCole Faust                     startfrom = i + 1;
252*c217d954SCole Faust                 }
253*c217d954SCole Faust             }
254*c217d954SCole Faust 
255*c217d954SCole Faust             line.erase(line.begin(), startfrom);
256*c217d954SCole Faust 
257*c217d954SCole Faust             max_cpus = support::cpp11::stoi(line, nullptr) + 1;
258*c217d954SCole Faust             success  = true;
259*c217d954SCole Faust         }
260*c217d954SCole Faust     }
261*c217d954SCole Faust 
262*c217d954SCole Faust     // Return std::thread::hardware_concurrency() as a fallback.
263*c217d954SCole Faust     if(!success)
264*c217d954SCole Faust     {
265*c217d954SCole Faust         max_cpus = std::thread::hardware_concurrency();
266*c217d954SCole Faust     }
267*c217d954SCole Faust     return max_cpus;
268*c217d954SCole Faust }
269*c217d954SCole Faust #elif defined(__aarch64__) && defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
270*c217d954SCole Faust /** Query features through sysctlbyname
271*c217d954SCole Faust   *
272*c217d954SCole Faust   * @return int value queried
273*c217d954SCole Faust   */
274*c217d954SCole Faust int get_hw_capability(const std::string &cap)
275*c217d954SCole Faust {
276*c217d954SCole Faust     int64_t result(0);
277*c217d954SCole Faust     size_t  size = sizeof(result);
278*c217d954SCole Faust     sysctlbyname(cap.c_str(), &result, &size, NULL, 0);
279*c217d954SCole Faust     return result;
280*c217d954SCole Faust }
281*c217d954SCole Faust #endif                                           /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
282*c217d954SCole Faust 
283*c217d954SCole Faust #if defined(BARE_METAL) && defined(__aarch64__)
get_sve_feature_reg()284*c217d954SCole Faust uint64_t get_sve_feature_reg()
285*c217d954SCole Faust {
286*c217d954SCole Faust     uint64_t svefr0 = 0;
287*c217d954SCole Faust     __asm __volatile(
288*c217d954SCole Faust         ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n"
289*c217d954SCole Faust         "MOV  %0, X3"
290*c217d954SCole Faust         : "=r"(svefr0)
291*c217d954SCole Faust         :
292*c217d954SCole Faust         : "x3");
293*c217d954SCole Faust     return svefr0;
294*c217d954SCole Faust }
295*c217d954SCole Faust #endif /* defined(BARE_METAL) && defined(__aarch64__) */
296*c217d954SCole Faust } // namespace
297*c217d954SCole Faust 
CpuInfo(CpuIsaInfo isa,std::vector<CpuModel> cpus)298*c217d954SCole Faust CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus)
299*c217d954SCole Faust     : _isa(std::move(isa)), _cpus(std::move(cpus))
300*c217d954SCole Faust {
301*c217d954SCole Faust }
302*c217d954SCole Faust 
build()303*c217d954SCole Faust CpuInfo CpuInfo::build()
304*c217d954SCole Faust {
305*c217d954SCole Faust #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
306*c217d954SCole Faust     const uint32_t hwcaps   = getauxval(AT_HWCAP);
307*c217d954SCole Faust     const uint32_t hwcaps2  = getauxval(AT_HWCAP2);
308*c217d954SCole Faust     const uint32_t max_cpus = get_max_cpus();
309*c217d954SCole Faust 
310*c217d954SCole Faust     // Populate midr values
311*c217d954SCole Faust     std::vector<uint32_t> cpus_midr;
312*c217d954SCole Faust     if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID)
313*c217d954SCole Faust     {
314*c217d954SCole Faust         cpus_midr = midr_from_cpuid(max_cpus);
315*c217d954SCole Faust     }
316*c217d954SCole Faust     if(cpus_midr.empty())
317*c217d954SCole Faust     {
318*c217d954SCole Faust         cpus_midr = midr_from_proc_cpuinfo(max_cpus);
319*c217d954SCole Faust     }
320*c217d954SCole Faust     if(cpus_midr.empty())
321*c217d954SCole Faust     {
322*c217d954SCole Faust         cpus_midr.resize(max_cpus, 0);
323*c217d954SCole Faust     }
324*c217d954SCole Faust 
325*c217d954SCole Faust     // Populate isa (Assume homogeneous ISA specification)
326*c217d954SCole Faust     CpuIsaInfo isa = init_cpu_isa_from_hwcaps(hwcaps, hwcaps2, cpus_midr.back());
327*c217d954SCole Faust 
328*c217d954SCole Faust     // Convert midr to models
329*c217d954SCole Faust     std::vector<CpuModel> cpus_model;
330*c217d954SCole Faust     std::transform(std::begin(cpus_midr), std::end(cpus_midr), std::back_inserter(cpus_model),
331*c217d954SCole Faust                    [](uint32_t midr) -> CpuModel { return midr_to_model(midr); });
332*c217d954SCole Faust 
333*c217d954SCole Faust     CpuInfo info(isa, cpus_model);
334*c217d954SCole Faust     return info;
335*c217d954SCole Faust 
336*c217d954SCole Faust #elif(BARE_METAL) && defined(__aarch64__)        /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
337*c217d954SCole Faust 
338*c217d954SCole Faust     // Assume single CPU in bare metal mode.  Just read the ID register and feature bits directly.
339*c217d954SCole Faust     uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0;
340*c217d954SCole Faust     ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1);
341*c217d954SCole Faust     ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1);
342*c217d954SCole Faust     ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1);
343*c217d954SCole Faust     ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1);
344*c217d954SCole Faust     ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1);
345*c217d954SCole Faust     if((pfr0 >> 32) & 0xf)
346*c217d954SCole Faust     {
347*c217d954SCole Faust         svefr0 = get_sve_feature_reg();
348*c217d954SCole Faust     }
349*c217d954SCole Faust 
350*c217d954SCole Faust     CpuIsaInfo            isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr);
351*c217d954SCole Faust     std::vector<CpuModel> cpus_model(1, midr_to_model(midr));
352*c217d954SCole Faust     CpuInfo               info(isa, cpus_model);
353*c217d954SCole Faust     return info;
354*c217d954SCole Faust #elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */
355*c217d954SCole Faust     int                   ncpus = get_hw_capability("hw.perflevel0.logicalcpu");
356*c217d954SCole Faust     CpuIsaInfo            isainfo;
357*c217d954SCole Faust     std::vector<CpuModel> cpus_model(ncpus);
358*c217d954SCole Faust     isainfo.neon = get_hw_capability("hw.optional.neon");
359*c217d954SCole Faust     isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16");
360*c217d954SCole Faust     isainfo.dot  = get_hw_capability("hw.optional.arm.FEAT_DotProd");
361*c217d954SCole Faust     CpuInfo info(isainfo, cpus_model);
362*c217d954SCole Faust     return info;
363*c217d954SCole Faust #else                                            /* #elif defined(__aarch64__) && defined(__APPLE__) */
364*c217d954SCole Faust     CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC });
365*c217d954SCole Faust     return info;
366*c217d954SCole Faust #endif                                           /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
367*c217d954SCole Faust }
368*c217d954SCole Faust 
cpu_model(uint32_t cpuid) const369*c217d954SCole Faust CpuModel CpuInfo::cpu_model(uint32_t cpuid) const
370*c217d954SCole Faust {
371*c217d954SCole Faust     if(cpuid < _cpus.size())
372*c217d954SCole Faust     {
373*c217d954SCole Faust         return _cpus[cpuid];
374*c217d954SCole Faust     }
375*c217d954SCole Faust     return CpuModel::GENERIC;
376*c217d954SCole Faust }
377*c217d954SCole Faust 
cpu_model() const378*c217d954SCole Faust CpuModel CpuInfo::cpu_model() const
379*c217d954SCole Faust {
380*c217d954SCole Faust #if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__))
381*c217d954SCole Faust     return cpu_model(0);
382*c217d954SCole Faust #else  /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
383*c217d954SCole Faust     return cpu_model(sched_getcpu());
384*c217d954SCole Faust #endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
385*c217d954SCole Faust }
386*c217d954SCole Faust 
num_cpus() const387*c217d954SCole Faust uint32_t CpuInfo::num_cpus() const
388*c217d954SCole Faust {
389*c217d954SCole Faust     return _cpus.size();
390*c217d954SCole Faust }
391*c217d954SCole Faust 
num_threads_hint()392*c217d954SCole Faust uint32_t num_threads_hint()
393*c217d954SCole Faust {
394*c217d954SCole Faust     unsigned int num_threads_hint = 1;
395*c217d954SCole Faust 
396*c217d954SCole Faust #if !defined(BARE_METAL) && !defined(_WIN64)
397*c217d954SCole Faust     std::vector<std::string> cpus;
398*c217d954SCole Faust     cpus.reserve(64);
399*c217d954SCole Faust 
400*c217d954SCole Faust     // CPU part regex
401*c217d954SCole Faust     regex_t cpu_part_rgx;
402*c217d954SCole Faust     memset(&cpu_part_rgx, 0, sizeof(regex_t));
403*c217d954SCole Faust     int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
404*c217d954SCole Faust     ARM_COMPUTE_UNUSED(ret_status);
405*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
406*c217d954SCole Faust 
407*c217d954SCole Faust     // Read cpuinfo and get occurrence of each core
408*c217d954SCole Faust     std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in);
409*c217d954SCole Faust     if(cpuinfo_file.is_open())
410*c217d954SCole Faust     {
411*c217d954SCole Faust         std::string line;
412*c217d954SCole Faust         while(bool(getline(cpuinfo_file, line)))
413*c217d954SCole Faust         {
414*c217d954SCole Faust             std::array<regmatch_t, 2> match;
415*c217d954SCole Faust             if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0)
416*c217d954SCole Faust             {
417*c217d954SCole Faust                 cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)));
418*c217d954SCole Faust             }
419*c217d954SCole Faust         }
420*c217d954SCole Faust     }
421*c217d954SCole Faust     regfree(&cpu_part_rgx);
422*c217d954SCole Faust 
423*c217d954SCole Faust     // Get min number of threads
424*c217d954SCole Faust     std::sort(std::begin(cpus), std::end(cpus));
425*c217d954SCole Faust     auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t
426*c217d954SCole Faust     {
427*c217d954SCole Faust         std::unordered_map<std::string, uint32_t> cpus_freq;
428*c217d954SCole Faust         for(const auto &cpu : cpus)
429*c217d954SCole Faust         {
430*c217d954SCole Faust             cpus_freq[cpu]++;
431*c217d954SCole Faust         }
432*c217d954SCole Faust 
433*c217d954SCole Faust         uint32_t vmin = cpus.size() + 1;
434*c217d954SCole Faust         for(const auto &cpu_freq : cpus_freq)
435*c217d954SCole Faust         {
436*c217d954SCole Faust             vmin = std::min(vmin, cpu_freq.second);
437*c217d954SCole Faust         }
438*c217d954SCole Faust         return vmin;
439*c217d954SCole Faust     };
440*c217d954SCole Faust 
441*c217d954SCole Faust     // Set thread hint
442*c217d954SCole Faust     num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus);
443*c217d954SCole Faust #endif /* !defined(BARE_METAL) */
444*c217d954SCole Faust 
445*c217d954SCole Faust     return num_threads_hint;
446*c217d954SCole Faust }
447*c217d954SCole Faust } // namespace cpuinfo
448*c217d954SCole Faust } // namespace arm_compute
449