xref: /aosp_15_r20/external/cronet/third_party/abseil-cpp/absl/base/internal/sysinfo.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/base/internal/sysinfo.h"
16 
17 #include "absl/base/attributes.h"
18 
19 #ifdef _WIN32
20 #include <windows.h>
21 #else
22 #include <fcntl.h>
23 #include <pthread.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #endif
28 
29 #ifdef __linux__
30 #include <sys/syscall.h>
31 #endif
32 
33 #if defined(__APPLE__) || defined(__FreeBSD__)
34 #include <sys/sysctl.h>
35 #endif
36 
37 #ifdef __FreeBSD__
38 #include <pthread_np.h>
39 #endif
40 
41 #ifdef __NetBSD__
42 #include <lwp.h>
43 #endif
44 
45 #if defined(__myriad2__)
46 #include <rtems.h>
47 #endif
48 
49 #include <string.h>
50 
51 #include <cassert>
52 #include <cerrno>
53 #include <cstdint>
54 #include <cstdio>
55 #include <cstdlib>
56 #include <ctime>
57 #include <limits>
58 #include <thread>  // NOLINT(build/c++11)
59 #include <utility>
60 #include <vector>
61 
62 #include "absl/base/call_once.h"
63 #include "absl/base/config.h"
64 #include "absl/base/internal/raw_logging.h"
65 #include "absl/base/internal/spinlock.h"
66 #include "absl/base/internal/unscaledcycleclock.h"
67 #include "absl/base/thread_annotations.h"
68 
69 namespace absl {
70 ABSL_NAMESPACE_BEGIN
71 namespace base_internal {
72 
73 namespace {
74 
75 #if defined(_WIN32)
76 
77 // Returns number of bits set in `bitMask`
Win32CountSetBits(ULONG_PTR bitMask)78 DWORD Win32CountSetBits(ULONG_PTR bitMask) {
79   for (DWORD bitSetCount = 0; ; ++bitSetCount) {
80     if (bitMask == 0) return bitSetCount;
81     bitMask &= bitMask - 1;
82   }
83 }
84 
85 // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
86 // 0 if the number of processors is not available or can not be computed.
87 // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
Win32NumCPUs()88 int Win32NumCPUs() {
89 #pragma comment(lib, "kernel32.lib")
90   using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
91 
92   DWORD info_size = sizeof(Info);
93   Info* info(static_cast<Info*>(malloc(info_size)));
94   if (info == nullptr) return 0;
95 
96   bool success = GetLogicalProcessorInformation(info, &info_size);
97   if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
98     free(info);
99     info = static_cast<Info*>(malloc(info_size));
100     if (info == nullptr) return 0;
101     success = GetLogicalProcessorInformation(info, &info_size);
102   }
103 
104   DWORD logicalProcessorCount = 0;
105   if (success) {
106     Info* ptr = info;
107     DWORD byteOffset = 0;
108     while (byteOffset + sizeof(Info) <= info_size) {
109       switch (ptr->Relationship) {
110         case RelationProcessorCore:
111           logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
112           break;
113 
114         case RelationNumaNode:
115         case RelationCache:
116         case RelationProcessorPackage:
117           // Ignore other entries
118           break;
119 
120         default:
121           // Ignore unknown entries
122           break;
123       }
124       byteOffset += sizeof(Info);
125       ptr++;
126     }
127   }
128   free(info);
129   return static_cast<int>(logicalProcessorCount);
130 }
131 
132 #endif
133 
134 }  // namespace
135 
GetNumCPUs()136 static int GetNumCPUs() {
137 #if defined(__myriad2__)
138   return 1;
139 #elif defined(_WIN32)
140   const int hardware_concurrency = Win32NumCPUs();
141   return hardware_concurrency ? hardware_concurrency : 1;
142 #elif defined(_AIX)
143   return sysconf(_SC_NPROCESSORS_ONLN);
144 #else
145   // Other possibilities:
146   //  - Read /sys/devices/system/cpu/online and use cpumask_parse()
147   //  - sysconf(_SC_NPROCESSORS_ONLN)
148   return static_cast<int>(std::thread::hardware_concurrency());
149 #endif
150 }
151 
152 #if defined(_WIN32)
153 
GetNominalCPUFrequency()154 static double GetNominalCPUFrequency() {
155 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
156     !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
157   // UWP apps don't have access to the registry and currently don't provide an
158   // API informing about CPU nominal frequency.
159   return 1.0;
160 #else
161 #pragma comment(lib, "advapi32.lib")  // For Reg* functions.
162   HKEY key;
163   // Use the Reg* functions rather than the SH functions because shlwapi.dll
164   // pulls in gdi32.dll which makes process destruction much more costly.
165   if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
166                     "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
167                     KEY_READ, &key) == ERROR_SUCCESS) {
168     DWORD type = 0;
169     DWORD data = 0;
170     DWORD data_size = sizeof(data);
171     auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
172                                    reinterpret_cast<LPBYTE>(&data), &data_size);
173     RegCloseKey(key);
174     if (result == ERROR_SUCCESS && type == REG_DWORD &&
175         data_size == sizeof(data)) {
176       return data * 1e6;  // Value is MHz.
177     }
178   }
179   return 1.0;
180 #endif  // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
181 }
182 
183 #elif defined(CTL_HW) && defined(HW_CPU_FREQ)
184 
GetNominalCPUFrequency()185 static double GetNominalCPUFrequency() {
186   unsigned freq;
187   size_t size = sizeof(freq);
188   int mib[2] = {CTL_HW, HW_CPU_FREQ};
189   if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
190     return static_cast<double>(freq);
191   }
192   return 1.0;
193 }
194 
195 #else
196 
197 // Helper function for reading a long from a file. Returns true if successful
198 // and the memory location pointed to by value is set to the value read.
ReadLongFromFile(const char * file,long * value)199 static bool ReadLongFromFile(const char *file, long *value) {
200   bool ret = false;
201 #if defined(_POSIX_C_SOURCE)
202   const int file_mode = (O_RDONLY | O_CLOEXEC);
203 #else
204   const int file_mode = O_RDONLY;
205 #endif
206 
207   int fd = open(file, file_mode);
208   if (fd != -1) {
209     char line[1024];
210     char *err;
211     memset(line, '\0', sizeof(line));
212     ssize_t len;
213     do {
214       len = read(fd, line, sizeof(line) - 1);
215     } while (len < 0 && errno == EINTR);
216     if (len <= 0) {
217       ret = false;
218     } else {
219       const long temp_value = strtol(line, &err, 10);
220       if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
221         *value = temp_value;
222         ret = true;
223       }
224     }
225     close(fd);
226   }
227   return ret;
228 }
229 
230 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
231 
232 // Reads a monotonic time source and returns a value in
233 // nanoseconds. The returned value uses an arbitrary epoch, not the
234 // Unix epoch.
ReadMonotonicClockNanos()235 static int64_t ReadMonotonicClockNanos() {
236   struct timespec t;
237 #ifdef CLOCK_MONOTONIC_RAW
238   int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
239 #else
240   int rc = clock_gettime(CLOCK_MONOTONIC, &t);
241 #endif
242   if (rc != 0) {
243     ABSL_INTERNAL_LOG(
244         FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
245   }
246   return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
247 }
248 
249 class UnscaledCycleClockWrapperForInitializeFrequency {
250  public:
Now()251   static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
252 };
253 
254 struct TimeTscPair {
255   int64_t time;  // From ReadMonotonicClockNanos().
256   int64_t tsc;   // From UnscaledCycleClock::Now().
257 };
258 
259 // Returns a pair of values (monotonic kernel time, TSC ticks) that
260 // approximately correspond to each other.  This is accomplished by
261 // doing several reads and picking the reading with the lowest
262 // latency.  This approach is used to minimize the probability that
263 // our thread was preempted between clock reads.
GetTimeTscPair()264 static TimeTscPair GetTimeTscPair() {
265   int64_t best_latency = std::numeric_limits<int64_t>::max();
266   TimeTscPair best;
267   for (int i = 0; i < 10; ++i) {
268     int64_t t0 = ReadMonotonicClockNanos();
269     int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
270     int64_t t1 = ReadMonotonicClockNanos();
271     int64_t latency = t1 - t0;
272     if (latency < best_latency) {
273       best_latency = latency;
274       best.time = t0;
275       best.tsc = tsc;
276     }
277   }
278   return best;
279 }
280 
281 // Measures and returns the TSC frequency by taking a pair of
282 // measurements approximately `sleep_nanoseconds` apart.
MeasureTscFrequencyWithSleep(int sleep_nanoseconds)283 static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
284   auto t0 = GetTimeTscPair();
285   struct timespec ts;
286   ts.tv_sec = 0;
287   ts.tv_nsec = sleep_nanoseconds;
288   while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
289   auto t1 = GetTimeTscPair();
290   double elapsed_ticks = t1.tsc - t0.tsc;
291   double elapsed_time = (t1.time - t0.time) * 1e-9;
292   return elapsed_ticks / elapsed_time;
293 }
294 
295 // Measures and returns the TSC frequency by calling
296 // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
297 // frequency measurement stabilizes.
MeasureTscFrequency()298 static double MeasureTscFrequency() {
299   double last_measurement = -1.0;
300   int sleep_nanoseconds = 1000000;  // 1 millisecond.
301   for (int i = 0; i < 8; ++i) {
302     double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
303     if (measurement * 0.99 < last_measurement &&
304         last_measurement < measurement * 1.01) {
305       // Use the current measurement if it is within 1% of the
306       // previous measurement.
307       return measurement;
308     }
309     last_measurement = measurement;
310     sleep_nanoseconds *= 2;
311   }
312   return last_measurement;
313 }
314 
315 #endif  // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
316 
GetNominalCPUFrequency()317 static double GetNominalCPUFrequency() {
318   long freq = 0;
319 
320   // Google's production kernel has a patch to export the TSC
321   // frequency through sysfs. If the kernel is exporting the TSC
322   // frequency use that. There are issues where cpuinfo_max_freq
323   // cannot be relied on because the BIOS may be exporting an invalid
324   // p-state (on x86) or p-states may be used to put the processor in
325   // a new mode (turbo mode). Essentially, those frequencies cannot
326   // always be relied upon. The same reasons apply to /proc/cpuinfo as
327   // well.
328   if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
329     return freq * 1e3;  // Value is kHz.
330   }
331 
332 #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
333   // On these platforms, the TSC frequency is the nominal CPU
334   // frequency.  But without having the kernel export it directly
335   // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
336   // other way to reliably get the TSC frequency, so we have to
337   // measure it ourselves.  Some CPUs abuse cpuinfo_max_freq by
338   // exporting "fake" frequencies for implementing new features. For
339   // example, Intel's turbo mode is enabled by exposing a p-state
340   // value with a higher frequency than that of the real TSC
341   // rate. Because of this, we prefer to measure the TSC rate
342   // ourselves on i386 and x86-64.
343   return MeasureTscFrequency();
344 #else
345 
346   // If CPU scaling is in effect, we want to use the *maximum*
347   // frequency, not whatever CPU speed some random processor happens
348   // to be using now.
349   if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
350                        &freq)) {
351     return freq * 1e3;  // Value is kHz.
352   }
353 
354   return 1.0;
355 #endif  // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
356 }
357 
358 #endif
359 
360 ABSL_CONST_INIT static once_flag init_num_cpus_once;
361 ABSL_CONST_INIT static int num_cpus = 0;
362 
363 // NumCPUs() may be called before main() and before malloc is properly
364 // initialized, therefore this must not allocate memory.
NumCPUs()365 int NumCPUs() {
366   base_internal::LowLevelCallOnce(
367       &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
368   return num_cpus;
369 }
370 
371 // A default frequency of 0.0 might be dangerous if it is used in division.
372 ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
373 ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;
374 
375 // NominalCPUFrequency() may be called before main() and before malloc is
376 // properly initialized, therefore this must not allocate memory.
NominalCPUFrequency()377 double NominalCPUFrequency() {
378   base_internal::LowLevelCallOnce(
379       &init_nominal_cpu_frequency_once,
380       []() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
381   return nominal_cpu_frequency;
382 }
383 
384 #if defined(_WIN32)
385 
GetTID()386 pid_t GetTID() {
387   return pid_t{GetCurrentThreadId()};
388 }
389 
390 #elif defined(__linux__)
391 
392 #ifndef SYS_gettid
393 #define SYS_gettid __NR_gettid
394 #endif
395 
GetTID()396 pid_t GetTID() {
397   return static_cast<pid_t>(syscall(SYS_gettid));
398 }
399 
400 #elif defined(__akaros__)
401 
GetTID()402 pid_t GetTID() {
403   // Akaros has a concept of "vcore context", which is the state the program
404   // is forced into when we need to make a user-level scheduling decision, or
405   // run a signal handler.  This is analogous to the interrupt context that a
406   // CPU might enter if it encounters some kind of exception.
407   //
408   // There is no current thread context in vcore context, but we need to give
409   // a reasonable answer if asked for a thread ID (e.g., in a signal handler).
410   // Thread 0 always exists, so if we are in vcore context, we return that.
411   //
412   // Otherwise, we know (since we are using pthreads) that the uthread struct
413   // current_uthread is pointing to is the first element of a
414   // struct pthread_tcb, so we extract and return the thread ID from that.
415   //
416   // TODO(dcross): Akaros anticipates moving the thread ID to the uthread
417   // structure at some point. We should modify this code to remove the cast
418   // when that happens.
419   if (in_vcore_context())
420     return 0;
421   return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
422 }
423 
424 #elif defined(__myriad2__)
425 
GetTID()426 pid_t GetTID() {
427   uint32_t tid;
428   rtems_task_ident(RTEMS_SELF, 0, &tid);
429   return tid;
430 }
431 
432 #elif defined(__APPLE__)
433 
GetTID()434 pid_t GetTID() {
435   uint64_t tid;
436   // `nullptr` here implies this thread.  This only fails if the specified
437   // thread is invalid or the pointer-to-tid is null, so we needn't worry about
438   // it.
439   pthread_threadid_np(nullptr, &tid);
440   return static_cast<pid_t>(tid);
441 }
442 
443 #elif defined(__FreeBSD__)
444 
GetTID()445 pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }
446 
447 #elif defined(__OpenBSD__)
448 
GetTID()449 pid_t GetTID() { return getthrid(); }
450 
451 #elif defined(__NetBSD__)
452 
GetTID()453 pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }
454 
455 #elif defined(__native_client__)
456 
GetTID()457 pid_t GetTID() {
458   auto* thread = pthread_self();
459   static_assert(sizeof(pid_t) == sizeof(thread),
460                 "In NaCL int expected to be the same size as a pointer");
461   return reinterpret_cast<pid_t>(thread);
462 }
463 
464 #else
465 
466 // Fallback implementation of `GetTID` using `pthread_self`.
GetTID()467 pid_t GetTID() {
468   // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
469   // should be handled above.
470   return static_cast<pid_t>(pthread_self());
471 }
472 
473 #endif
474 
475 // GetCachedTID() caches the thread ID in thread-local storage (which is a
476 // userspace construct) to avoid unnecessary system calls. Without this caching,
477 // it can take roughly 98ns, while it takes roughly 1ns with this caching.
GetCachedTID()478 pid_t GetCachedTID() {
479 #ifdef ABSL_HAVE_THREAD_LOCAL
480   static thread_local pid_t thread_id = GetTID();
481   return thread_id;
482 #else
483   return GetTID();
484 #endif  // ABSL_HAVE_THREAD_LOCAL
485 }
486 
487 }  // namespace base_internal
488 ABSL_NAMESPACE_END
489 }  // namespace absl
490