1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/base/internal/sysinfo.h"
16
17 #include "absl/base/attributes.h"
18
19 #ifdef _WIN32
20 #include <windows.h>
21 #else
22 #include <fcntl.h>
23 #include <pthread.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #endif
28
29 #ifdef __linux__
30 #include <sys/syscall.h>
31 #endif
32
33 #if defined(__APPLE__) || defined(__FreeBSD__)
34 #include <sys/sysctl.h>
35 #endif
36
37 #ifdef __FreeBSD__
38 #include <pthread_np.h>
39 #endif
40
41 #ifdef __NetBSD__
42 #include <lwp.h>
43 #endif
44
45 #if defined(__myriad2__)
46 #include <rtems.h>
47 #endif
48
49 #include <string.h>
50
51 #include <cassert>
52 #include <cerrno>
53 #include <cstdint>
54 #include <cstdio>
55 #include <cstdlib>
56 #include <ctime>
57 #include <limits>
58 #include <thread> // NOLINT(build/c++11)
59 #include <utility>
60 #include <vector>
61
62 #include "absl/base/call_once.h"
63 #include "absl/base/config.h"
64 #include "absl/base/internal/raw_logging.h"
65 #include "absl/base/internal/spinlock.h"
66 #include "absl/base/internal/unscaledcycleclock.h"
67 #include "absl/base/thread_annotations.h"
68
69 namespace absl {
70 ABSL_NAMESPACE_BEGIN
71 namespace base_internal {
72
73 namespace {
74
75 #if defined(_WIN32)
76
77 // Returns number of bits set in `bitMask`
Win32CountSetBits(ULONG_PTR bitMask)78 DWORD Win32CountSetBits(ULONG_PTR bitMask) {
79 for (DWORD bitSetCount = 0; ; ++bitSetCount) {
80 if (bitMask == 0) return bitSetCount;
81 bitMask &= bitMask - 1;
82 }
83 }
84
85 // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
86 // 0 if the number of processors is not available or can not be computed.
87 // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
Win32NumCPUs()88 int Win32NumCPUs() {
89 #pragma comment(lib, "kernel32.lib")
90 using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
91
92 DWORD info_size = sizeof(Info);
93 Info* info(static_cast<Info*>(malloc(info_size)));
94 if (info == nullptr) return 0;
95
96 bool success = GetLogicalProcessorInformation(info, &info_size);
97 if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
98 free(info);
99 info = static_cast<Info*>(malloc(info_size));
100 if (info == nullptr) return 0;
101 success = GetLogicalProcessorInformation(info, &info_size);
102 }
103
104 DWORD logicalProcessorCount = 0;
105 if (success) {
106 Info* ptr = info;
107 DWORD byteOffset = 0;
108 while (byteOffset + sizeof(Info) <= info_size) {
109 switch (ptr->Relationship) {
110 case RelationProcessorCore:
111 logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
112 break;
113
114 case RelationNumaNode:
115 case RelationCache:
116 case RelationProcessorPackage:
117 // Ignore other entries
118 break;
119
120 default:
121 // Ignore unknown entries
122 break;
123 }
124 byteOffset += sizeof(Info);
125 ptr++;
126 }
127 }
128 free(info);
129 return static_cast<int>(logicalProcessorCount);
130 }
131
132 #endif
133
134 } // namespace
135
GetNumCPUs()136 static int GetNumCPUs() {
137 #if defined(__myriad2__)
138 return 1;
139 #elif defined(_WIN32)
140 const int hardware_concurrency = Win32NumCPUs();
141 return hardware_concurrency ? hardware_concurrency : 1;
142 #elif defined(_AIX)
143 return sysconf(_SC_NPROCESSORS_ONLN);
144 #else
145 // Other possibilities:
146 // - Read /sys/devices/system/cpu/online and use cpumask_parse()
147 // - sysconf(_SC_NPROCESSORS_ONLN)
148 return static_cast<int>(std::thread::hardware_concurrency());
149 #endif
150 }
151
152 #if defined(_WIN32)
153
GetNominalCPUFrequency()154 static double GetNominalCPUFrequency() {
155 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
156 !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
157 // UWP apps don't have access to the registry and currently don't provide an
158 // API informing about CPU nominal frequency.
159 return 1.0;
160 #else
161 #pragma comment(lib, "advapi32.lib") // For Reg* functions.
162 HKEY key;
163 // Use the Reg* functions rather than the SH functions because shlwapi.dll
164 // pulls in gdi32.dll which makes process destruction much more costly.
165 if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
166 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
167 KEY_READ, &key) == ERROR_SUCCESS) {
168 DWORD type = 0;
169 DWORD data = 0;
170 DWORD data_size = sizeof(data);
171 auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
172 reinterpret_cast<LPBYTE>(&data), &data_size);
173 RegCloseKey(key);
174 if (result == ERROR_SUCCESS && type == REG_DWORD &&
175 data_size == sizeof(data)) {
176 return data * 1e6; // Value is MHz.
177 }
178 }
179 return 1.0;
180 #endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
181 }
182
183 #elif defined(CTL_HW) && defined(HW_CPU_FREQ)
184
GetNominalCPUFrequency()185 static double GetNominalCPUFrequency() {
186 unsigned freq;
187 size_t size = sizeof(freq);
188 int mib[2] = {CTL_HW, HW_CPU_FREQ};
189 if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
190 return static_cast<double>(freq);
191 }
192 return 1.0;
193 }
194
195 #else
196
197 // Helper function for reading a long from a file. Returns true if successful
198 // and the memory location pointed to by value is set to the value read.
ReadLongFromFile(const char * file,long * value)199 static bool ReadLongFromFile(const char *file, long *value) {
200 bool ret = false;
201 #if defined(_POSIX_C_SOURCE)
202 const int file_mode = (O_RDONLY | O_CLOEXEC);
203 #else
204 const int file_mode = O_RDONLY;
205 #endif
206
207 int fd = open(file, file_mode);
208 if (fd != -1) {
209 char line[1024];
210 char *err;
211 memset(line, '\0', sizeof(line));
212 ssize_t len;
213 do {
214 len = read(fd, line, sizeof(line) - 1);
215 } while (len < 0 && errno == EINTR);
216 if (len <= 0) {
217 ret = false;
218 } else {
219 const long temp_value = strtol(line, &err, 10);
220 if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
221 *value = temp_value;
222 ret = true;
223 }
224 }
225 close(fd);
226 }
227 return ret;
228 }
229
230 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
231
232 // Reads a monotonic time source and returns a value in
233 // nanoseconds. The returned value uses an arbitrary epoch, not the
234 // Unix epoch.
ReadMonotonicClockNanos()235 static int64_t ReadMonotonicClockNanos() {
236 struct timespec t;
237 #ifdef CLOCK_MONOTONIC_RAW
238 int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
239 #else
240 int rc = clock_gettime(CLOCK_MONOTONIC, &t);
241 #endif
242 if (rc != 0) {
243 ABSL_INTERNAL_LOG(
244 FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
245 }
246 return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
247 }
248
249 class UnscaledCycleClockWrapperForInitializeFrequency {
250 public:
Now()251 static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
252 };
253
254 struct TimeTscPair {
255 int64_t time; // From ReadMonotonicClockNanos().
256 int64_t tsc; // From UnscaledCycleClock::Now().
257 };
258
259 // Returns a pair of values (monotonic kernel time, TSC ticks) that
260 // approximately correspond to each other. This is accomplished by
261 // doing several reads and picking the reading with the lowest
262 // latency. This approach is used to minimize the probability that
263 // our thread was preempted between clock reads.
GetTimeTscPair()264 static TimeTscPair GetTimeTscPair() {
265 int64_t best_latency = std::numeric_limits<int64_t>::max();
266 TimeTscPair best;
267 for (int i = 0; i < 10; ++i) {
268 int64_t t0 = ReadMonotonicClockNanos();
269 int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
270 int64_t t1 = ReadMonotonicClockNanos();
271 int64_t latency = t1 - t0;
272 if (latency < best_latency) {
273 best_latency = latency;
274 best.time = t0;
275 best.tsc = tsc;
276 }
277 }
278 return best;
279 }
280
281 // Measures and returns the TSC frequency by taking a pair of
282 // measurements approximately `sleep_nanoseconds` apart.
MeasureTscFrequencyWithSleep(int sleep_nanoseconds)283 static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
284 auto t0 = GetTimeTscPair();
285 struct timespec ts;
286 ts.tv_sec = 0;
287 ts.tv_nsec = sleep_nanoseconds;
288 while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
289 auto t1 = GetTimeTscPair();
290 double elapsed_ticks = t1.tsc - t0.tsc;
291 double elapsed_time = (t1.time - t0.time) * 1e-9;
292 return elapsed_ticks / elapsed_time;
293 }
294
295 // Measures and returns the TSC frequency by calling
296 // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
297 // frequency measurement stabilizes.
MeasureTscFrequency()298 static double MeasureTscFrequency() {
299 double last_measurement = -1.0;
300 int sleep_nanoseconds = 1000000; // 1 millisecond.
301 for (int i = 0; i < 8; ++i) {
302 double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
303 if (measurement * 0.99 < last_measurement &&
304 last_measurement < measurement * 1.01) {
305 // Use the current measurement if it is within 1% of the
306 // previous measurement.
307 return measurement;
308 }
309 last_measurement = measurement;
310 sleep_nanoseconds *= 2;
311 }
312 return last_measurement;
313 }
314
315 #endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
316
GetNominalCPUFrequency()317 static double GetNominalCPUFrequency() {
318 long freq = 0;
319
320 // Google's production kernel has a patch to export the TSC
321 // frequency through sysfs. If the kernel is exporting the TSC
322 // frequency use that. There are issues where cpuinfo_max_freq
323 // cannot be relied on because the BIOS may be exporting an invalid
324 // p-state (on x86) or p-states may be used to put the processor in
325 // a new mode (turbo mode). Essentially, those frequencies cannot
326 // always be relied upon. The same reasons apply to /proc/cpuinfo as
327 // well.
328 if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
329 return freq * 1e3; // Value is kHz.
330 }
331
332 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
333 // On these platforms, the TSC frequency is the nominal CPU
334 // frequency. But without having the kernel export it directly
335 // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
336 // other way to reliably get the TSC frequency, so we have to
337 // measure it ourselves. Some CPUs abuse cpuinfo_max_freq by
338 // exporting "fake" frequencies for implementing new features. For
339 // example, Intel's turbo mode is enabled by exposing a p-state
340 // value with a higher frequency than that of the real TSC
341 // rate. Because of this, we prefer to measure the TSC rate
342 // ourselves on i386 and x86-64.
343 return MeasureTscFrequency();
344 #else
345
346 // If CPU scaling is in effect, we want to use the *maximum*
347 // frequency, not whatever CPU speed some random processor happens
348 // to be using now.
349 if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
350 &freq)) {
351 return freq * 1e3; // Value is kHz.
352 }
353
354 return 1.0;
355 #endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
356 }
357
358 #endif
359
360 ABSL_CONST_INIT static once_flag init_num_cpus_once;
361 ABSL_CONST_INIT static int num_cpus = 0;
362
363 // NumCPUs() may be called before main() and before malloc is properly
364 // initialized, therefore this must not allocate memory.
NumCPUs()365 int NumCPUs() {
366 base_internal::LowLevelCallOnce(
367 &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
368 return num_cpus;
369 }
370
371 // A default frequency of 0.0 might be dangerous if it is used in division.
372 ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
373 ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;
374
375 // NominalCPUFrequency() may be called before main() and before malloc is
376 // properly initialized, therefore this must not allocate memory.
NominalCPUFrequency()377 double NominalCPUFrequency() {
378 base_internal::LowLevelCallOnce(
379 &init_nominal_cpu_frequency_once,
380 []() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
381 return nominal_cpu_frequency;
382 }
383
384 #if defined(_WIN32)
385
GetTID()386 pid_t GetTID() {
387 return pid_t{GetCurrentThreadId()};
388 }
389
390 #elif defined(__linux__)
391
392 #ifndef SYS_gettid
393 #define SYS_gettid __NR_gettid
394 #endif
395
GetTID()396 pid_t GetTID() {
397 return static_cast<pid_t>(syscall(SYS_gettid));
398 }
399
400 #elif defined(__akaros__)
401
GetTID()402 pid_t GetTID() {
403 // Akaros has a concept of "vcore context", which is the state the program
404 // is forced into when we need to make a user-level scheduling decision, or
405 // run a signal handler. This is analogous to the interrupt context that a
406 // CPU might enter if it encounters some kind of exception.
407 //
408 // There is no current thread context in vcore context, but we need to give
409 // a reasonable answer if asked for a thread ID (e.g., in a signal handler).
410 // Thread 0 always exists, so if we are in vcore context, we return that.
411 //
412 // Otherwise, we know (since we are using pthreads) that the uthread struct
413 // current_uthread is pointing to is the first element of a
414 // struct pthread_tcb, so we extract and return the thread ID from that.
415 //
416 // TODO(dcross): Akaros anticipates moving the thread ID to the uthread
417 // structure at some point. We should modify this code to remove the cast
418 // when that happens.
419 if (in_vcore_context())
420 return 0;
421 return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
422 }
423
424 #elif defined(__myriad2__)
425
GetTID()426 pid_t GetTID() {
427 uint32_t tid;
428 rtems_task_ident(RTEMS_SELF, 0, &tid);
429 return tid;
430 }
431
432 #elif defined(__APPLE__)
433
GetTID()434 pid_t GetTID() {
435 uint64_t tid;
436 // `nullptr` here implies this thread. This only fails if the specified
437 // thread is invalid or the pointer-to-tid is null, so we needn't worry about
438 // it.
439 pthread_threadid_np(nullptr, &tid);
440 return static_cast<pid_t>(tid);
441 }
442
443 #elif defined(__FreeBSD__)
444
GetTID()445 pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }
446
447 #elif defined(__OpenBSD__)
448
GetTID()449 pid_t GetTID() { return getthrid(); }
450
451 #elif defined(__NetBSD__)
452
GetTID()453 pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }
454
455 #elif defined(__native_client__)
456
GetTID()457 pid_t GetTID() {
458 auto* thread = pthread_self();
459 static_assert(sizeof(pid_t) == sizeof(thread),
460 "In NaCL int expected to be the same size as a pointer");
461 return reinterpret_cast<pid_t>(thread);
462 }
463
464 #else
465
466 // Fallback implementation of `GetTID` using `pthread_self`.
GetTID()467 pid_t GetTID() {
468 // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
469 // should be handled above.
470 return static_cast<pid_t>(pthread_self());
471 }
472
473 #endif
474
475 // GetCachedTID() caches the thread ID in thread-local storage (which is a
476 // userspace construct) to avoid unnecessary system calls. Without this caching,
477 // it can take roughly 98ns, while it takes roughly 1ns with this caching.
GetCachedTID()478 pid_t GetCachedTID() {
479 #ifdef ABSL_HAVE_THREAD_LOCAL
480 static thread_local pid_t thread_id = GetTID();
481 return thread_id;
482 #else
483 return GetTID();
484 #endif // ABSL_HAVE_THREAD_LOCAL
485 }
486
487 } // namespace base_internal
488 ABSL_NAMESPACE_END
489 } // namespace absl
490