1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
9 #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
10
11 /* cxa_guard_impl.h - Implements the C++ runtime support for function local
12 * static guards.
13 * The layout of the guard object is the same across ARM and Itanium.
14 *
15 * The first "guard byte" (which is checked by the compiler) is set only upon
16 * the completion of cxa release.
17 *
18 * The second "init byte" does the rest of the bookkeeping. It tracks if
19 * initialization is complete or pending, and if there are waiting threads.
20 *
21 * If the guard variable is 64-bits and the platforms supplies a 32-bit thread
22 * identifier, it is used to detect recursive initialization. The thread ID of
23 * the thread currently performing initialization is stored in the second word.
24 *
25 * Guard Object Layout:
26 * -------------------------------------------------------------------------
27 * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... |
28 * ------------------------------------------------------------------------
29 *
30 * Access Protocol:
31 * For each implementation the guard byte is checked and set before accessing
32 * the init byte.
33 *
34 * Overall Design:
35 * The implementation was designed to allow each implementation to be tested
36 * independent of the C++ runtime or platform support.
37 *
38 */
39
40 #include "__cxxabi_config.h"
41 #include "include/atomic_support.h"
42 #include <unistd.h>
43 #include <sys/types.h>
44 // Android Trusty: sys/syscall.h tries to include bits/syscall.h, which is
45 // missing. Trusty seems to define _LIBCXXABI_HAS_NO_THREADS, and gettid isn't
46 // needed in that case, so skip sys/syscall.h.
47 #if defined(__has_include) && !defined(_LIBCXXABI_HAS_NO_THREADS)
48 # if __has_include(<sys/syscall.h>)
49 # include <sys/syscall.h>
50 # endif
51 #endif
52
53 #include <stdlib.h>
54 #include <__threading_support>
55
56 // To make testing possible, this header is included from both cxa_guard.cpp
57 // and a number of tests.
58 //
59 // For this reason we place everything in an anonymous namespace -- even though
60 // we're in a header. We want the actual implementation and the tests to have
61 // unique definitions of the types in this header (since the tests may depend
62 // on function local statics).
63 //
64 // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be
65 // defined when including this file. Only `src/cxa_guard.cpp` should define
66 // the former.
67 #ifdef BUILDING_CXA_GUARD
68 # include "abort_message.h"
69 # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__)
70 #elif defined(TESTING_CXA_GUARD)
71 # define ABORT_WITH_MESSAGE(...) ::abort()
72 #else
73 # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined"
74 #endif
75
76 #if __has_feature(thread_sanitizer)
77 extern "C" void __tsan_acquire(void*);
78 extern "C" void __tsan_release(void*);
79 #else
80 #define __tsan_acquire(addr) ((void)0)
81 #define __tsan_release(addr) ((void)0)
82 #endif
83
84 namespace __cxxabiv1 {
85 // Use an anonymous namespace to ensure that the tests and actual implementation
86 // have unique definitions of these symbols.
87 namespace {
88
89 //===----------------------------------------------------------------------===//
90 // Misc Utilities
91 //===----------------------------------------------------------------------===//
92
93 template <class T, T(*Init)()>
94 struct LazyValue {
LazyValueLazyValue95 LazyValue() : is_init(false) {}
96
getLazyValue97 T& get() {
98 if (!is_init) {
99 value = Init();
100 is_init = true;
101 }
102 return value;
103 }
104 private:
105 T value;
106 bool is_init = false;
107 };
108
109 //===----------------------------------------------------------------------===//
110 // PlatformGetThreadID
111 //===----------------------------------------------------------------------===//
112
113 #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
PlatformThreadID()114 uint32_t PlatformThreadID() {
115 static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "");
116 return static_cast<uint32_t>(
117 pthread_mach_thread_np(std::__libcpp_thread_get_current_id()));
118 }
119 #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \
120 !defined(__BIONIC__)
121 // Bionic: Disable the SYS_gettid feature for now. Some processes on Android
122 // block SYS_gettid using seccomp.
PlatformThreadID()123 uint32_t PlatformThreadID() {
124 static_assert(sizeof(pid_t) == sizeof(uint32_t), "");
125 return static_cast<uint32_t>(syscall(SYS_gettid));
126 }
127 #else
128 constexpr uint32_t (*PlatformThreadID)() = nullptr;
129 #endif
130
131
PlatformSupportsThreadID()132 constexpr bool PlatformSupportsThreadID() {
133 #ifdef __clang__
134 #pragma clang diagnostic push
135 #pragma clang diagnostic ignored "-Wtautological-pointer-compare"
136 #endif
137 return +PlatformThreadID != nullptr;
138 #ifdef __clang__
139 #pragma clang diagnostic pop
140 #endif
141 }
142
143 //===----------------------------------------------------------------------===//
144 // GuardBase
145 //===----------------------------------------------------------------------===//
146
147 enum class AcquireResult {
148 INIT_IS_DONE,
149 INIT_IS_PENDING,
150 };
151 constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
152 constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
153
154 static constexpr uint8_t UNSET = 0;
155 static constexpr uint8_t COMPLETE_BIT = (1 << 0);
156 static constexpr uint8_t PENDING_BIT = (1 << 1);
157 static constexpr uint8_t WAITING_BIT = (1 << 2);
158
159 template <class Derived>
160 struct GuardObject {
161 GuardObject() = delete;
162 GuardObject(GuardObject const&) = delete;
163 GuardObject& operator=(GuardObject const&) = delete;
164
GuardObjectGuardObject165 explicit GuardObject(uint32_t* g)
166 : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
167 init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
168 thread_id_address(nullptr) {}
169
GuardObjectGuardObject170 explicit GuardObject(uint64_t* g)
171 : base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
172 init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
173 thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {}
174
175 public:
176 /// Implements __cxa_guard_acquire
cxa_guard_acquireGuardObject177 AcquireResult cxa_guard_acquire() {
178 AtomicInt<uint8_t> guard_byte(guard_byte_address);
179 if (guard_byte.load(std::_AO_Acquire) != UNSET)
180 return INIT_IS_DONE;
181 return derived()->acquire_init_byte();
182 }
183
184 /// Implements __cxa_guard_release
cxa_guard_releaseGuardObject185 void cxa_guard_release() {
186 AtomicInt<uint8_t> guard_byte(guard_byte_address);
187 // Store complete first, so that when release wakes other folks, they see
188 // it as having been completed.
189 guard_byte.store(COMPLETE_BIT, std::_AO_Release);
190 derived()->release_init_byte();
191 }
192
193 /// Implements __cxa_guard_abort
cxa_guard_abortGuardObject194 void cxa_guard_abort() { derived()->abort_init_byte(); }
195
196 public:
197 /// base_address - the address of the original guard object.
198 void* const base_address;
199 /// The address of the guord byte at offset 0.
200 uint8_t* const guard_byte_address;
201 /// The address of the byte used by the implementation during initialization.
202 uint8_t* const init_byte_address;
203 /// An optional address storing an identifier for the thread performing initialization.
204 /// It's used to detect recursive initialization.
205 uint32_t* const thread_id_address;
206
207 private:
derivedGuardObject208 Derived* derived() { return static_cast<Derived*>(this); }
209 };
210
211 //===----------------------------------------------------------------------===//
212 // Single Threaded Implementation
213 //===----------------------------------------------------------------------===//
214
215 struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
216 using GuardObject::GuardObject;
217
acquire_init_byteInitByteNoThreads218 AcquireResult acquire_init_byte() {
219 if (*init_byte_address == COMPLETE_BIT)
220 return INIT_IS_DONE;
221 if (*init_byte_address & PENDING_BIT)
222 ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
223 *init_byte_address = PENDING_BIT;
224 return INIT_IS_PENDING;
225 }
226
release_init_byteInitByteNoThreads227 void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
abort_init_byteInitByteNoThreads228 void abort_init_byte() { *init_byte_address = UNSET; }
229 };
230
231
232 //===----------------------------------------------------------------------===//
233 // Global Mutex Implementation
234 //===----------------------------------------------------------------------===//
235
236 struct LibcppMutex;
237 struct LibcppCondVar;
238
239 #ifndef _LIBCXXABI_HAS_NO_THREADS
240 struct LibcppMutex {
241 LibcppMutex() = default;
242 LibcppMutex(LibcppMutex const&) = delete;
243 LibcppMutex& operator=(LibcppMutex const&) = delete;
244
lockLibcppMutex245 bool lock() { return std::__libcpp_mutex_lock(&mutex); }
unlockLibcppMutex246 bool unlock() { return std::__libcpp_mutex_unlock(&mutex); }
247
248 private:
249 friend struct LibcppCondVar;
250 std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER;
251 };
252
253 struct LibcppCondVar {
254 LibcppCondVar() = default;
255 LibcppCondVar(LibcppCondVar const&) = delete;
256 LibcppCondVar& operator=(LibcppCondVar const&) = delete;
257
waitLibcppCondVar258 bool wait(LibcppMutex& mut) {
259 return std::__libcpp_condvar_wait(&cond, &mut.mutex);
260 }
broadcastLibcppCondVar261 bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); }
262
263 private:
264 std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER;
265 };
266 #else
267 struct LibcppMutex {};
268 struct LibcppCondVar {};
269 #endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
270
271
272 template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
273 uint32_t (*GetThreadID)() = PlatformThreadID>
274 struct InitByteGlobalMutex
275 : GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond,
276 GetThreadID>> {
277
278 using BaseT = typename InitByteGlobalMutex::GuardObject;
279 using BaseT::BaseT;
280
InitByteGlobalMutexInitByteGlobalMutex281 explicit InitByteGlobalMutex(uint32_t *g)
282 : BaseT(g), has_thread_id_support(false) {}
InitByteGlobalMutexInitByteGlobalMutex283 explicit InitByteGlobalMutex(uint64_t *g)
284 : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {}
285
286 public:
acquire_init_byteInitByteGlobalMutex287 AcquireResult acquire_init_byte() {
288 LockGuard g("__cxa_guard_acquire");
289 // Check for possible recursive initialization.
290 if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
291 if (*thread_id_address == current_thread_id.get())
292 ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
293 }
294
295 // Wait until the pending bit is not set.
296 while (*init_byte_address & PENDING_BIT) {
297 *init_byte_address |= WAITING_BIT;
298 global_cond.wait(global_mutex);
299 }
300
301 if (*init_byte_address == COMPLETE_BIT)
302 return INIT_IS_DONE;
303
304 if (has_thread_id_support)
305 *thread_id_address = current_thread_id.get();
306
307 *init_byte_address = PENDING_BIT;
308 return INIT_IS_PENDING;
309 }
310
release_init_byteInitByteGlobalMutex311 void release_init_byte() {
312 bool has_waiting;
313 {
314 LockGuard g("__cxa_guard_release");
315 has_waiting = *init_byte_address & WAITING_BIT;
316 *init_byte_address = COMPLETE_BIT;
317 }
318 if (has_waiting) {
319 if (global_cond.broadcast()) {
320 ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release");
321 }
322 }
323 }
324
abort_init_byteInitByteGlobalMutex325 void abort_init_byte() {
326 bool has_waiting;
327 {
328 LockGuard g("__cxa_guard_abort");
329 if (has_thread_id_support)
330 *thread_id_address = 0;
331 has_waiting = *init_byte_address & WAITING_BIT;
332 *init_byte_address = UNSET;
333 }
334 if (has_waiting) {
335 if (global_cond.broadcast()) {
336 ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort");
337 }
338 }
339 }
340
341 private:
342 using BaseT::init_byte_address;
343 using BaseT::thread_id_address;
344 const bool has_thread_id_support;
345 LazyValue<uint32_t, GetThreadID> current_thread_id;
346
347 private:
348 struct LockGuard {
349 LockGuard() = delete;
350 LockGuard(LockGuard const&) = delete;
351 LockGuard& operator=(LockGuard const&) = delete;
352
LockGuardInitByteGlobalMutex::LockGuard353 explicit LockGuard(const char* calling_func)
354 : calling_func(calling_func) {
355 if (global_mutex.lock())
356 ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func);
357 }
358
~LockGuardInitByteGlobalMutex::LockGuard359 ~LockGuard() {
360 if (global_mutex.unlock())
361 ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func);
362 }
363
364 private:
365 const char* const calling_func;
366 };
367 };
368
369 //===----------------------------------------------------------------------===//
370 // Futex Implementation
371 //===----------------------------------------------------------------------===//
372
373 #if defined(SYS_futex)
PlatformFutexWait(int * addr,int expect)374 void PlatformFutexWait(int* addr, int expect) {
375 constexpr int WAIT = 0;
376 syscall(SYS_futex, addr, WAIT, expect, 0);
377 __tsan_acquire(addr);
378 }
PlatformFutexWake(int * addr)379 void PlatformFutexWake(int* addr) {
380 constexpr int WAKE = 1;
381 __tsan_release(addr);
382 syscall(SYS_futex, addr, WAKE, INT_MAX);
383 }
384 #else
385 constexpr void (*PlatformFutexWait)(int*, int) = nullptr;
386 constexpr void (*PlatformFutexWake)(int*) = nullptr;
387 #endif
388
PlatformSupportsFutex()389 constexpr bool PlatformSupportsFutex() {
390 #ifdef __clang__
391 #pragma clang diagnostic push
392 #pragma clang diagnostic ignored "-Wtautological-pointer-compare"
393 #endif
394 return +PlatformFutexWait != nullptr;
395 #ifdef __clang__
396 #pragma clang diagnostic pop
397 #endif
398 }
399
400 /// InitByteFutex - Manages initialization using atomics and the futex syscall
401 /// for waiting and waking.
402 template <void (*Wait)(int*, int) = PlatformFutexWait,
403 void (*Wake)(int*) = PlatformFutexWake,
404 uint32_t (*GetThreadIDArg)() = PlatformThreadID>
405 struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
406 using BaseT = typename InitByteFutex::GuardObject;
407
408 /// ARM Constructor
InitByteFutexInitByteFutex409 explicit InitByteFutex(uint32_t *g) : BaseT(g),
410 init_byte(this->init_byte_address),
411 has_thread_id_support(this->thread_id_address && GetThreadIDArg),
412 thread_id(this->thread_id_address) {}
413
414 /// Itanium Constructor
InitByteFutexInitByteFutex415 explicit InitByteFutex(uint64_t *g) : BaseT(g),
416 init_byte(this->init_byte_address),
417 has_thread_id_support(this->thread_id_address && GetThreadIDArg),
418 thread_id(this->thread_id_address) {}
419
420 public:
acquire_init_byteInitByteFutex421 AcquireResult acquire_init_byte() {
422 while (true) {
423 uint8_t last_val = UNSET;
424 if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel,
425 std::_AO_Acquire)) {
426 if (has_thread_id_support) {
427 thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
428 }
429 return INIT_IS_PENDING;
430 }
431
432 if (last_val == COMPLETE_BIT)
433 return INIT_IS_DONE;
434
435 if (last_val & PENDING_BIT) {
436
437 // Check for recursive initialization
438 if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) {
439 ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
440 }
441
442 if ((last_val & WAITING_BIT) == 0) {
443 // This compare exchange can fail for several reasons
444 // (1) another thread finished the whole thing before we got here
445 // (2) another thread set the waiting bit we were trying to thread
446 // (3) another thread had an exception and failed to finish
447 if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT,
448 std::_AO_Acq_Rel, std::_AO_Release)) {
449 // (1) success, via someone else's work!
450 if (last_val == COMPLETE_BIT)
451 return INIT_IS_DONE;
452
453 // (3) someone else, bailed on doing the work, retry from the start!
454 if (last_val == UNSET)
455 continue;
456
457 // (2) the waiting bit got set, so we are happy to keep waiting
458 }
459 }
460 wait_on_initialization();
461 }
462 }
463 }
464
release_init_byteInitByteFutex465 void release_init_byte() {
466 uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
467 if (old & WAITING_BIT)
468 wake_all();
469 }
470
abort_init_byteInitByteFutex471 void abort_init_byte() {
472 if (has_thread_id_support)
473 thread_id.store(0, std::_AO_Relaxed);
474
475 uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel);
476 if (old & WAITING_BIT)
477 wake_all();
478 }
479
480 private:
481 /// Use the futex to wait on the current guard variable. Futex expects a
482 /// 32-bit 4-byte aligned address as the first argument, so we have to use use
483 /// the base address of the guard variable (not the init byte).
wait_on_initializationInitByteFutex484 void wait_on_initialization() {
485 Wait(static_cast<int*>(this->base_address),
486 expected_value_for_futex(PENDING_BIT | WAITING_BIT));
487 }
wake_allInitByteFutex488 void wake_all() { Wake(static_cast<int*>(this->base_address)); }
489
490 private:
491 AtomicInt<uint8_t> init_byte;
492
493 const bool has_thread_id_support;
494 // Unsafe to use unless has_thread_id_support
495 AtomicInt<uint32_t> thread_id;
496 LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
497
498 /// Create the expected integer value for futex `wait(int* addr, int expected)`.
499 /// We pass the base address as the first argument, So this function creates
500 /// an zero-initialized integer with `b` copied at the correct offset.
expected_value_for_futexInitByteFutex501 static int expected_value_for_futex(uint8_t b) {
502 int dest_val = 0;
503 std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1);
504 return dest_val;
505 }
506
507 static_assert(Wait != nullptr && Wake != nullptr, "");
508 };
509
510 //===----------------------------------------------------------------------===//
511 //
512 //===----------------------------------------------------------------------===//
513
514 template <class T>
515 struct GlobalStatic {
516 static T instance;
517 };
518 template <class T>
519 _LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {};
520
521 enum class Implementation {
522 NoThreads,
523 GlobalLock,
524 Futex
525 };
526
527 template <Implementation Impl>
528 struct SelectImplementation;
529
530 template <>
531 struct SelectImplementation<Implementation::NoThreads> {
532 using type = InitByteNoThreads;
533 };
534
535 template <>
536 struct SelectImplementation<Implementation::GlobalLock> {
537 using type = InitByteGlobalMutex<
538 LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
539 GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
540 };
541
542 template <>
543 struct SelectImplementation<Implementation::Futex> {
544 using type =
545 InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
546 };
547
548 // TODO(EricWF): We should prefer the futex implementation when available. But
549 // it should be done in a separate step from adding the implementation.
550 constexpr Implementation CurrentImplementation =
551 #if defined(_LIBCXXABI_HAS_NO_THREADS)
552 Implementation::NoThreads;
553 #elif defined(_LIBCXXABI_USE_FUTEX)
554 Implementation::Futex;
555 #else
556 Implementation::GlobalLock;
557 #endif
558
559 static_assert(CurrentImplementation != Implementation::Futex
560 || PlatformSupportsFutex(), "Futex selected but not supported");
561
562 using SelectedImplementation =
563 SelectImplementation<CurrentImplementation>::type;
564
565 } // end namespace
566 } // end namespace __cxxabiv1
567
568 #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
569