xref: /aosp_15_r20/external/angle/third_party/abseil-cpp/absl/base/internal/sysinfo.cc (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/base/internal/sysinfo.h"
16 
17 #include "absl/base/attributes.h"
18 
19 #ifdef _WIN32
20 #include <windows.h>
21 #else
22 #include <fcntl.h>
23 #include <pthread.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #endif
28 
29 #ifdef __linux__
30 #include <sys/syscall.h>
31 #endif
32 
33 #if defined(__APPLE__) || defined(__FreeBSD__)
34 #include <sys/sysctl.h>
35 #endif
36 
37 #ifdef __FreeBSD__
38 #include <pthread_np.h>
39 #endif
40 
41 #ifdef __NetBSD__
42 #include <lwp.h>
43 #endif
44 
45 #if defined(__myriad2__)
46 #include <rtems.h>
47 #endif
48 
49 #if defined(__Fuchsia__)
50 #include <zircon/process.h>
51 #endif
52 
53 #include <string.h>
54 
55 #include <cassert>
56 #include <cerrno>
57 #include <cstdint>
58 #include <cstdio>
59 #include <cstdlib>
60 #include <ctime>
61 #include <limits>
62 #include <thread>  // NOLINT(build/c++11)
63 #include <utility>
64 #include <vector>
65 
66 #include "absl/base/call_once.h"
67 #include "absl/base/config.h"
68 #include "absl/base/internal/raw_logging.h"
69 #include "absl/base/internal/spinlock.h"
70 #include "absl/base/internal/unscaledcycleclock.h"
71 #include "absl/base/thread_annotations.h"
72 
73 namespace absl {
74 ABSL_NAMESPACE_BEGIN
75 namespace base_internal {
76 
77 namespace {
78 
79 #if defined(_WIN32)
80 
81 // Returns number of bits set in `bitMask`
Win32CountSetBits(ULONG_PTR bitMask)82 DWORD Win32CountSetBits(ULONG_PTR bitMask) {
83   for (DWORD bitSetCount = 0; ; ++bitSetCount) {
84     if (bitMask == 0) return bitSetCount;
85     bitMask &= bitMask - 1;
86   }
87 }
88 
89 // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
90 // 0 if the number of processors is not available or can not be computed.
91 // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
Win32NumCPUs()92 int Win32NumCPUs() {
93 #pragma comment(lib, "kernel32.lib")
94   using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
95 
96   DWORD info_size = sizeof(Info);
97   Info* info(static_cast<Info*>(malloc(info_size)));
98   if (info == nullptr) return 0;
99 
100   bool success = GetLogicalProcessorInformation(info, &info_size);
101   if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
102     free(info);
103     info = static_cast<Info*>(malloc(info_size));
104     if (info == nullptr) return 0;
105     success = GetLogicalProcessorInformation(info, &info_size);
106   }
107 
108   DWORD logicalProcessorCount = 0;
109   if (success) {
110     Info* ptr = info;
111     DWORD byteOffset = 0;
112     while (byteOffset + sizeof(Info) <= info_size) {
113       switch (ptr->Relationship) {
114         case RelationProcessorCore:
115           logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
116           break;
117 
118         case RelationNumaNode:
119         case RelationCache:
120         case RelationProcessorPackage:
121           // Ignore other entries
122           break;
123 
124         default:
125           // Ignore unknown entries
126           break;
127       }
128       byteOffset += sizeof(Info);
129       ptr++;
130     }
131   }
132   free(info);
133   return static_cast<int>(logicalProcessorCount);
134 }
135 
136 #endif
137 
138 }  // namespace
139 
GetNumCPUs()140 static int GetNumCPUs() {
141 #if defined(__myriad2__)
142   return 1;
143 #elif defined(_WIN32)
144   const int hardware_concurrency = Win32NumCPUs();
145   return hardware_concurrency ? hardware_concurrency : 1;
146 #elif defined(_AIX)
147   return sysconf(_SC_NPROCESSORS_ONLN);
148 #else
149   // Other possibilities:
150   //  - Read /sys/devices/system/cpu/online and use cpumask_parse()
151   //  - sysconf(_SC_NPROCESSORS_ONLN)
152   return static_cast<int>(std::thread::hardware_concurrency());
153 #endif
154 }
155 
156 #if defined(_WIN32)
157 
GetNominalCPUFrequency()158 static double GetNominalCPUFrequency() {
159 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
160     !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
161   // UWP apps don't have access to the registry and currently don't provide an
162   // API informing about CPU nominal frequency.
163   return 1.0;
164 #else
165 #pragma comment(lib, "advapi32.lib")  // For Reg* functions.
166   HKEY key;
167   // Use the Reg* functions rather than the SH functions because shlwapi.dll
168   // pulls in gdi32.dll which makes process destruction much more costly.
169   if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
170                     "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
171                     KEY_READ, &key) == ERROR_SUCCESS) {
172     DWORD type = 0;
173     DWORD data = 0;
174     DWORD data_size = sizeof(data);
175     auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
176                                    reinterpret_cast<LPBYTE>(&data), &data_size);
177     RegCloseKey(key);
178     if (result == ERROR_SUCCESS && type == REG_DWORD &&
179         data_size == sizeof(data)) {
180       return data * 1e6;  // Value is MHz.
181     }
182   }
183   return 1.0;
184 #endif  // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
185 }
186 
187 #elif defined(CTL_HW) && defined(HW_CPU_FREQ)
188 
GetNominalCPUFrequency()189 static double GetNominalCPUFrequency() {
190   unsigned freq;
191   size_t size = sizeof(freq);
192   int mib[2] = {CTL_HW, HW_CPU_FREQ};
193   if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
194     return static_cast<double>(freq);
195   }
196   return 1.0;
197 }
198 
199 #else
200 
201 // Helper function for reading a long from a file. Returns true if successful
202 // and the memory location pointed to by value is set to the value read.
ReadLongFromFile(const char * file,long * value)203 static bool ReadLongFromFile(const char *file, long *value) {
204   bool ret = false;
205 #if defined(_POSIX_C_SOURCE)
206   const int file_mode = (O_RDONLY | O_CLOEXEC);
207 #else
208   const int file_mode = O_RDONLY;
209 #endif
210 
211   int fd = open(file, file_mode);
212   if (fd != -1) {
213     char line[1024];
214     char *err;
215     memset(line, '\0', sizeof(line));
216     ssize_t len;
217     do {
218       len = read(fd, line, sizeof(line) - 1);
219     } while (len < 0 && errno == EINTR);
220     if (len <= 0) {
221       ret = false;
222     } else {
223       const long temp_value = strtol(line, &err, 10);
224       if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
225         *value = temp_value;
226         ret = true;
227       }
228     }
229     close(fd);
230   }
231   return ret;
232 }
233 
234 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
235 
236 // Reads a monotonic time source and returns a value in
237 // nanoseconds. The returned value uses an arbitrary epoch, not the
238 // Unix epoch.
ReadMonotonicClockNanos()239 static int64_t ReadMonotonicClockNanos() {
240   struct timespec t;
241 #ifdef CLOCK_MONOTONIC_RAW
242   int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
243 #else
244   int rc = clock_gettime(CLOCK_MONOTONIC, &t);
245 #endif
246   if (rc != 0) {
247     ABSL_INTERNAL_LOG(
248         FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
249   }
250   return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
251 }
252 
253 class UnscaledCycleClockWrapperForInitializeFrequency {
254  public:
Now()255   static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
256 };
257 
258 struct TimeTscPair {
259   int64_t time;  // From ReadMonotonicClockNanos().
260   int64_t tsc;   // From UnscaledCycleClock::Now().
261 };
262 
263 // Returns a pair of values (monotonic kernel time, TSC ticks) that
264 // approximately correspond to each other.  This is accomplished by
265 // doing several reads and picking the reading with the lowest
266 // latency.  This approach is used to minimize the probability that
267 // our thread was preempted between clock reads.
GetTimeTscPair()268 static TimeTscPair GetTimeTscPair() {
269   int64_t best_latency = std::numeric_limits<int64_t>::max();
270   TimeTscPair best;
271   for (int i = 0; i < 10; ++i) {
272     int64_t t0 = ReadMonotonicClockNanos();
273     int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
274     int64_t t1 = ReadMonotonicClockNanos();
275     int64_t latency = t1 - t0;
276     if (latency < best_latency) {
277       best_latency = latency;
278       best.time = t0;
279       best.tsc = tsc;
280     }
281   }
282   return best;
283 }
284 
285 // Measures and returns the TSC frequency by taking a pair of
286 // measurements approximately `sleep_nanoseconds` apart.
MeasureTscFrequencyWithSleep(int sleep_nanoseconds)287 static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
288   auto t0 = GetTimeTscPair();
289   struct timespec ts;
290   ts.tv_sec = 0;
291   ts.tv_nsec = sleep_nanoseconds;
292   while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
293   auto t1 = GetTimeTscPair();
294   double elapsed_ticks = t1.tsc - t0.tsc;
295   double elapsed_time = (t1.time - t0.time) * 1e-9;
296   return elapsed_ticks / elapsed_time;
297 }
298 
299 // Measures and returns the TSC frequency by calling
300 // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
301 // frequency measurement stabilizes.
MeasureTscFrequency()302 static double MeasureTscFrequency() {
303   double last_measurement = -1.0;
304   int sleep_nanoseconds = 1000000;  // 1 millisecond.
305   for (int i = 0; i < 8; ++i) {
306     double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
307     if (measurement * 0.99 < last_measurement &&
308         last_measurement < measurement * 1.01) {
309       // Use the current measurement if it is within 1% of the
310       // previous measurement.
311       return measurement;
312     }
313     last_measurement = measurement;
314     sleep_nanoseconds *= 2;
315   }
316   return last_measurement;
317 }
318 
319 #endif  // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
320 
GetNominalCPUFrequency()321 static double GetNominalCPUFrequency() {
322   long freq = 0;
323 
324   // Google's production kernel has a patch to export the TSC
325   // frequency through sysfs. If the kernel is exporting the TSC
326   // frequency use that. There are issues where cpuinfo_max_freq
327   // cannot be relied on because the BIOS may be exporting an invalid
328   // p-state (on x86) or p-states may be used to put the processor in
329   // a new mode (turbo mode). Essentially, those frequencies cannot
330   // always be relied upon. The same reasons apply to /proc/cpuinfo as
331   // well.
332   if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
333     return freq * 1e3;  // Value is kHz.
334   }
335 
336 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
337   // On these platforms, the TSC frequency is the nominal CPU
338   // frequency.  But without having the kernel export it directly
339   // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
340   // other way to reliably get the TSC frequency, so we have to
341   // measure it ourselves.  Some CPUs abuse cpuinfo_max_freq by
342   // exporting "fake" frequencies for implementing new features. For
343   // example, Intel's turbo mode is enabled by exposing a p-state
344   // value with a higher frequency than that of the real TSC
345   // rate. Because of this, we prefer to measure the TSC rate
346   // ourselves on i386 and x86-64.
347   return MeasureTscFrequency();
348 #else
349 
350   // If CPU scaling is in effect, we want to use the *maximum*
351   // frequency, not whatever CPU speed some random processor happens
352   // to be using now.
353   if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
354                        &freq)) {
355     return freq * 1e3;  // Value is kHz.
356   }
357 
358   return 1.0;
359 #endif  // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
360 }
361 
362 #endif
363 
364 ABSL_CONST_INIT static once_flag init_num_cpus_once;
365 ABSL_CONST_INIT static int num_cpus = 0;
366 
367 // NumCPUs() may be called before main() and before malloc is properly
368 // initialized, therefore this must not allocate memory.
NumCPUs()369 int NumCPUs() {
370   base_internal::LowLevelCallOnce(
371       &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
372   return num_cpus;
373 }
374 
375 // A default frequency of 0.0 might be dangerous if it is used in division.
376 ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
377 ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;
378 
379 // NominalCPUFrequency() may be called before main() and before malloc is
380 // properly initialized, therefore this must not allocate memory.
NominalCPUFrequency()381 double NominalCPUFrequency() {
382   base_internal::LowLevelCallOnce(
383       &init_nominal_cpu_frequency_once,
384       []() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
385   return nominal_cpu_frequency;
386 }
387 
388 #if defined(_WIN32)
389 
GetTID()390 pid_t GetTID() {
391   return pid_t{GetCurrentThreadId()};
392 }
393 
394 #elif defined(__linux__)
395 
396 #ifndef SYS_gettid
397 #define SYS_gettid __NR_gettid
398 #endif
399 
GetTID()400 pid_t GetTID() {
401   return static_cast<pid_t>(syscall(SYS_gettid));
402 }
403 
404 #elif defined(__akaros__)
405 
GetTID()406 pid_t GetTID() {
407   // Akaros has a concept of "vcore context", which is the state the program
408   // is forced into when we need to make a user-level scheduling decision, or
409   // run a signal handler.  This is analogous to the interrupt context that a
410   // CPU might enter if it encounters some kind of exception.
411   //
412   // There is no current thread context in vcore context, but we need to give
413   // a reasonable answer if asked for a thread ID (e.g., in a signal handler).
414   // Thread 0 always exists, so if we are in vcore context, we return that.
415   //
416   // Otherwise, we know (since we are using pthreads) that the uthread struct
417   // current_uthread is pointing to is the first element of a
418   // struct pthread_tcb, so we extract and return the thread ID from that.
419   //
420   // TODO(dcross): Akaros anticipates moving the thread ID to the uthread
421   // structure at some point. We should modify this code to remove the cast
422   // when that happens.
423   if (in_vcore_context())
424     return 0;
425   return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
426 }
427 
428 #elif defined(__myriad2__)
429 
GetTID()430 pid_t GetTID() {
431   uint32_t tid;
432   rtems_task_ident(RTEMS_SELF, 0, &tid);
433   return tid;
434 }
435 
436 #elif defined(__APPLE__)
437 
GetTID()438 pid_t GetTID() {
439   uint64_t tid;
440   // `nullptr` here implies this thread.  This only fails if the specified
441   // thread is invalid or the pointer-to-tid is null, so we needn't worry about
442   // it.
443   pthread_threadid_np(nullptr, &tid);
444   return static_cast<pid_t>(tid);
445 }
446 
447 #elif defined(__FreeBSD__)
448 
GetTID()449 pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }
450 
451 #elif defined(__OpenBSD__)
452 
GetTID()453 pid_t GetTID() { return getthrid(); }
454 
455 #elif defined(__NetBSD__)
456 
GetTID()457 pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }
458 
459 #elif defined(__native_client__)
460 
GetTID()461 pid_t GetTID() {
462   auto* thread = pthread_self();
463   static_assert(sizeof(pid_t) == sizeof(thread),
464                 "In NaCL int expected to be the same size as a pointer");
465   return reinterpret_cast<pid_t>(thread);
466 }
467 
468 #elif defined(__Fuchsia__)
469 
GetTID()470 pid_t GetTID() {
471   // Use our thread handle as the TID, which should be unique within this
472   // process (but may not be globally unique). The handle value was chosen over
473   // a kernel object ID (KOID) because zx_handle_t (32-bits) can be cast to a
474   // pid_t type without loss of precision, but a zx_koid_t (64-bits) cannot.
475   return static_cast<pid_t>(zx_thread_self());
476 }
477 
478 #else
479 
480 // Fallback implementation of `GetTID` using `pthread_self`.
GetTID()481 pid_t GetTID() {
482   // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
483   // should be handled above.
484   return static_cast<pid_t>(pthread_self());
485 }
486 
487 #endif
488 
489 // GetCachedTID() caches the thread ID in thread-local storage (which is a
490 // userspace construct) to avoid unnecessary system calls. Without this caching,
491 // it can take roughly 98ns, while it takes roughly 1ns with this caching.
GetCachedTID()492 pid_t GetCachedTID() {
493 #ifdef ABSL_HAVE_THREAD_LOCAL
494   static thread_local pid_t thread_id = GetTID();
495   return thread_id;
496 #else
497   return GetTID();
498 #endif  // ABSL_HAVE_THREAD_LOCAL
499 }
500 
501 }  // namespace base_internal
502 ABSL_NAMESPACE_END
503 }  // namespace absl
504