1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/base/internal/sysinfo.h"
16
17 #include "absl/base/attributes.h"
18
19 #ifdef _WIN32
20 #include <windows.h>
21 #else
22 #include <fcntl.h>
23 #include <pthread.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #endif
28
29 #ifdef __linux__
30 #include <sys/syscall.h>
31 #endif
32
33 #if defined(__APPLE__) || defined(__FreeBSD__)
34 #include <sys/sysctl.h>
35 #endif
36
37 #ifdef __FreeBSD__
38 #include <pthread_np.h>
39 #endif
40
41 #ifdef __NetBSD__
42 #include <lwp.h>
43 #endif
44
45 #if defined(__myriad2__)
46 #include <rtems.h>
47 #endif
48
49 #if defined(__Fuchsia__)
50 #include <zircon/process.h>
51 #endif
52
53 #include <string.h>
54
55 #include <cassert>
56 #include <cerrno>
57 #include <cstdint>
58 #include <cstdio>
59 #include <cstdlib>
60 #include <ctime>
61 #include <limits>
62 #include <thread> // NOLINT(build/c++11)
63 #include <utility>
64 #include <vector>
65
66 #include "absl/base/call_once.h"
67 #include "absl/base/config.h"
68 #include "absl/base/internal/raw_logging.h"
69 #include "absl/base/internal/spinlock.h"
70 #include "absl/base/internal/unscaledcycleclock.h"
71 #include "absl/base/thread_annotations.h"
72
73 namespace absl {
74 ABSL_NAMESPACE_BEGIN
75 namespace base_internal {
76
77 namespace {
78
79 #if defined(_WIN32)
80
81 // Returns number of bits set in `bitMask`
Win32CountSetBits(ULONG_PTR bitMask)82 DWORD Win32CountSetBits(ULONG_PTR bitMask) {
83 for (DWORD bitSetCount = 0; ; ++bitSetCount) {
84 if (bitMask == 0) return bitSetCount;
85 bitMask &= bitMask - 1;
86 }
87 }
88
89 // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
90 // 0 if the number of processors is not available or can not be computed.
91 // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
Win32NumCPUs()92 int Win32NumCPUs() {
93 #pragma comment(lib, "kernel32.lib")
94 using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
95
96 DWORD info_size = sizeof(Info);
97 Info* info(static_cast<Info*>(malloc(info_size)));
98 if (info == nullptr) return 0;
99
100 bool success = GetLogicalProcessorInformation(info, &info_size);
101 if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
102 free(info);
103 info = static_cast<Info*>(malloc(info_size));
104 if (info == nullptr) return 0;
105 success = GetLogicalProcessorInformation(info, &info_size);
106 }
107
108 DWORD logicalProcessorCount = 0;
109 if (success) {
110 Info* ptr = info;
111 DWORD byteOffset = 0;
112 while (byteOffset + sizeof(Info) <= info_size) {
113 switch (ptr->Relationship) {
114 case RelationProcessorCore:
115 logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
116 break;
117
118 case RelationNumaNode:
119 case RelationCache:
120 case RelationProcessorPackage:
121 // Ignore other entries
122 break;
123
124 default:
125 // Ignore unknown entries
126 break;
127 }
128 byteOffset += sizeof(Info);
129 ptr++;
130 }
131 }
132 free(info);
133 return static_cast<int>(logicalProcessorCount);
134 }
135
136 #endif
137
138 } // namespace
139
GetNumCPUs()140 static int GetNumCPUs() {
141 #if defined(__myriad2__)
142 return 1;
143 #elif defined(_WIN32)
144 const int hardware_concurrency = Win32NumCPUs();
145 return hardware_concurrency ? hardware_concurrency : 1;
146 #elif defined(_AIX)
147 return sysconf(_SC_NPROCESSORS_ONLN);
148 #else
149 // Other possibilities:
150 // - Read /sys/devices/system/cpu/online and use cpumask_parse()
151 // - sysconf(_SC_NPROCESSORS_ONLN)
152 return static_cast<int>(std::thread::hardware_concurrency());
153 #endif
154 }
155
156 #if defined(_WIN32)
157
GetNominalCPUFrequency()158 static double GetNominalCPUFrequency() {
159 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
160 !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
161 // UWP apps don't have access to the registry and currently don't provide an
162 // API informing about CPU nominal frequency.
163 return 1.0;
164 #else
165 #pragma comment(lib, "advapi32.lib") // For Reg* functions.
166 HKEY key;
167 // Use the Reg* functions rather than the SH functions because shlwapi.dll
168 // pulls in gdi32.dll which makes process destruction much more costly.
169 if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
170 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
171 KEY_READ, &key) == ERROR_SUCCESS) {
172 DWORD type = 0;
173 DWORD data = 0;
174 DWORD data_size = sizeof(data);
175 auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
176 reinterpret_cast<LPBYTE>(&data), &data_size);
177 RegCloseKey(key);
178 if (result == ERROR_SUCCESS && type == REG_DWORD &&
179 data_size == sizeof(data)) {
180 return data * 1e6; // Value is MHz.
181 }
182 }
183 return 1.0;
184 #endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
185 }
186
187 #elif defined(CTL_HW) && defined(HW_CPU_FREQ)
188
GetNominalCPUFrequency()189 static double GetNominalCPUFrequency() {
190 unsigned freq;
191 size_t size = sizeof(freq);
192 int mib[2] = {CTL_HW, HW_CPU_FREQ};
193 if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
194 return static_cast<double>(freq);
195 }
196 return 1.0;
197 }
198
199 #else
200
201 // Helper function for reading a long from a file. Returns true if successful
202 // and the memory location pointed to by value is set to the value read.
ReadLongFromFile(const char * file,long * value)203 static bool ReadLongFromFile(const char *file, long *value) {
204 bool ret = false;
205 #if defined(_POSIX_C_SOURCE)
206 const int file_mode = (O_RDONLY | O_CLOEXEC);
207 #else
208 const int file_mode = O_RDONLY;
209 #endif
210
211 int fd = open(file, file_mode);
212 if (fd != -1) {
213 char line[1024];
214 char *err;
215 memset(line, '\0', sizeof(line));
216 ssize_t len;
217 do {
218 len = read(fd, line, sizeof(line) - 1);
219 } while (len < 0 && errno == EINTR);
220 if (len <= 0) {
221 ret = false;
222 } else {
223 const long temp_value = strtol(line, &err, 10);
224 if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
225 *value = temp_value;
226 ret = true;
227 }
228 }
229 close(fd);
230 }
231 return ret;
232 }
233
234 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
235
236 // Reads a monotonic time source and returns a value in
237 // nanoseconds. The returned value uses an arbitrary epoch, not the
238 // Unix epoch.
ReadMonotonicClockNanos()239 static int64_t ReadMonotonicClockNanos() {
240 struct timespec t;
241 #ifdef CLOCK_MONOTONIC_RAW
242 int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
243 #else
244 int rc = clock_gettime(CLOCK_MONOTONIC, &t);
245 #endif
246 if (rc != 0) {
247 ABSL_INTERNAL_LOG(
248 FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
249 }
250 return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
251 }
252
253 class UnscaledCycleClockWrapperForInitializeFrequency {
254 public:
Now()255 static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
256 };
257
258 struct TimeTscPair {
259 int64_t time; // From ReadMonotonicClockNanos().
260 int64_t tsc; // From UnscaledCycleClock::Now().
261 };
262
263 // Returns a pair of values (monotonic kernel time, TSC ticks) that
264 // approximately correspond to each other. This is accomplished by
265 // doing several reads and picking the reading with the lowest
266 // latency. This approach is used to minimize the probability that
267 // our thread was preempted between clock reads.
GetTimeTscPair()268 static TimeTscPair GetTimeTscPair() {
269 int64_t best_latency = std::numeric_limits<int64_t>::max();
270 TimeTscPair best;
271 for (int i = 0; i < 10; ++i) {
272 int64_t t0 = ReadMonotonicClockNanos();
273 int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
274 int64_t t1 = ReadMonotonicClockNanos();
275 int64_t latency = t1 - t0;
276 if (latency < best_latency) {
277 best_latency = latency;
278 best.time = t0;
279 best.tsc = tsc;
280 }
281 }
282 return best;
283 }
284
285 // Measures and returns the TSC frequency by taking a pair of
286 // measurements approximately `sleep_nanoseconds` apart.
MeasureTscFrequencyWithSleep(int sleep_nanoseconds)287 static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
288 auto t0 = GetTimeTscPair();
289 struct timespec ts;
290 ts.tv_sec = 0;
291 ts.tv_nsec = sleep_nanoseconds;
292 while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
293 auto t1 = GetTimeTscPair();
294 double elapsed_ticks = t1.tsc - t0.tsc;
295 double elapsed_time = (t1.time - t0.time) * 1e-9;
296 return elapsed_ticks / elapsed_time;
297 }
298
299 // Measures and returns the TSC frequency by calling
300 // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
301 // frequency measurement stabilizes.
MeasureTscFrequency()302 static double MeasureTscFrequency() {
303 double last_measurement = -1.0;
304 int sleep_nanoseconds = 1000000; // 1 millisecond.
305 for (int i = 0; i < 8; ++i) {
306 double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
307 if (measurement * 0.99 < last_measurement &&
308 last_measurement < measurement * 1.01) {
309 // Use the current measurement if it is within 1% of the
310 // previous measurement.
311 return measurement;
312 }
313 last_measurement = measurement;
314 sleep_nanoseconds *= 2;
315 }
316 return last_measurement;
317 }
318
319 #endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
320
GetNominalCPUFrequency()321 static double GetNominalCPUFrequency() {
322 long freq = 0;
323
324 // Google's production kernel has a patch to export the TSC
325 // frequency through sysfs. If the kernel is exporting the TSC
326 // frequency use that. There are issues where cpuinfo_max_freq
327 // cannot be relied on because the BIOS may be exporting an invalid
328 // p-state (on x86) or p-states may be used to put the processor in
329 // a new mode (turbo mode). Essentially, those frequencies cannot
330 // always be relied upon. The same reasons apply to /proc/cpuinfo as
331 // well.
332 if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
333 return freq * 1e3; // Value is kHz.
334 }
335
336 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
337 // On these platforms, the TSC frequency is the nominal CPU
338 // frequency. But without having the kernel export it directly
339 // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
340 // other way to reliably get the TSC frequency, so we have to
341 // measure it ourselves. Some CPUs abuse cpuinfo_max_freq by
342 // exporting "fake" frequencies for implementing new features. For
343 // example, Intel's turbo mode is enabled by exposing a p-state
344 // value with a higher frequency than that of the real TSC
345 // rate. Because of this, we prefer to measure the TSC rate
346 // ourselves on i386 and x86-64.
347 return MeasureTscFrequency();
348 #else
349
350 // If CPU scaling is in effect, we want to use the *maximum*
351 // frequency, not whatever CPU speed some random processor happens
352 // to be using now.
353 if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
354 &freq)) {
355 return freq * 1e3; // Value is kHz.
356 }
357
358 return 1.0;
359 #endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
360 }
361
362 #endif
363
364 ABSL_CONST_INIT static once_flag init_num_cpus_once;
365 ABSL_CONST_INIT static int num_cpus = 0;
366
367 // NumCPUs() may be called before main() and before malloc is properly
368 // initialized, therefore this must not allocate memory.
NumCPUs()369 int NumCPUs() {
370 base_internal::LowLevelCallOnce(
371 &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
372 return num_cpus;
373 }
374
375 // A default frequency of 0.0 might be dangerous if it is used in division.
376 ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
377 ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;
378
379 // NominalCPUFrequency() may be called before main() and before malloc is
380 // properly initialized, therefore this must not allocate memory.
NominalCPUFrequency()381 double NominalCPUFrequency() {
382 base_internal::LowLevelCallOnce(
383 &init_nominal_cpu_frequency_once,
384 []() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
385 return nominal_cpu_frequency;
386 }
387
388 #if defined(_WIN32)
389
GetTID()390 pid_t GetTID() {
391 return pid_t{GetCurrentThreadId()};
392 }
393
394 #elif defined(__linux__)
395
396 #ifndef SYS_gettid
397 #define SYS_gettid __NR_gettid
398 #endif
399
GetTID()400 pid_t GetTID() {
401 return static_cast<pid_t>(syscall(SYS_gettid));
402 }
403
404 #elif defined(__akaros__)
405
GetTID()406 pid_t GetTID() {
407 // Akaros has a concept of "vcore context", which is the state the program
408 // is forced into when we need to make a user-level scheduling decision, or
409 // run a signal handler. This is analogous to the interrupt context that a
410 // CPU might enter if it encounters some kind of exception.
411 //
412 // There is no current thread context in vcore context, but we need to give
413 // a reasonable answer if asked for a thread ID (e.g., in a signal handler).
414 // Thread 0 always exists, so if we are in vcore context, we return that.
415 //
416 // Otherwise, we know (since we are using pthreads) that the uthread struct
417 // current_uthread is pointing to is the first element of a
418 // struct pthread_tcb, so we extract and return the thread ID from that.
419 //
420 // TODO(dcross): Akaros anticipates moving the thread ID to the uthread
421 // structure at some point. We should modify this code to remove the cast
422 // when that happens.
423 if (in_vcore_context())
424 return 0;
425 return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
426 }
427
428 #elif defined(__myriad2__)
429
GetTID()430 pid_t GetTID() {
431 uint32_t tid;
432 rtems_task_ident(RTEMS_SELF, 0, &tid);
433 return tid;
434 }
435
436 #elif defined(__APPLE__)
437
GetTID()438 pid_t GetTID() {
439 uint64_t tid;
440 // `nullptr` here implies this thread. This only fails if the specified
441 // thread is invalid or the pointer-to-tid is null, so we needn't worry about
442 // it.
443 pthread_threadid_np(nullptr, &tid);
444 return static_cast<pid_t>(tid);
445 }
446
447 #elif defined(__FreeBSD__)
448
GetTID()449 pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }
450
451 #elif defined(__OpenBSD__)
452
GetTID()453 pid_t GetTID() { return getthrid(); }
454
455 #elif defined(__NetBSD__)
456
GetTID()457 pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }
458
459 #elif defined(__native_client__)
460
GetTID()461 pid_t GetTID() {
462 auto* thread = pthread_self();
463 static_assert(sizeof(pid_t) == sizeof(thread),
464 "In NaCL int expected to be the same size as a pointer");
465 return reinterpret_cast<pid_t>(thread);
466 }
467
468 #elif defined(__Fuchsia__)
469
GetTID()470 pid_t GetTID() {
471 // Use our thread handle as the TID, which should be unique within this
472 // process (but may not be globally unique). The handle value was chosen over
473 // a kernel object ID (KOID) because zx_handle_t (32-bits) can be cast to a
474 // pid_t type without loss of precision, but a zx_koid_t (64-bits) cannot.
475 return static_cast<pid_t>(zx_thread_self());
476 }
477
478 #else
479
480 // Fallback implementation of `GetTID` using `pthread_self`.
GetTID()481 pid_t GetTID() {
482 // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
483 // should be handled above.
484 return static_cast<pid_t>(pthread_self());
485 }
486
487 #endif
488
489 // GetCachedTID() caches the thread ID in thread-local storage (which is a
490 // userspace construct) to avoid unnecessary system calls. Without this caching,
491 // it can take roughly 98ns, while it takes roughly 1ns with this caching.
GetCachedTID()492 pid_t GetCachedTID() {
493 #ifdef ABSL_HAVE_THREAD_LOCAL
494 static thread_local pid_t thread_id = GetTID();
495 return thread_id;
496 #else
497 return GetTID();
498 #endif // ABSL_HAVE_THREAD_LOCAL
499 }
500
501 } // namespace base_internal
502 ABSL_NAMESPACE_END
503 } // namespace absl
504