xref: /aosp_15_r20/external/cronet/base/allocator/partition_allocator/src/partition_alloc/spinning_mutex.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef PARTITION_ALLOC_SPINNING_MUTEX_H_
6 #define PARTITION_ALLOC_SPINNING_MUTEX_H_
7 
8 #include <algorithm>
9 #include <atomic>
10 
11 #include "build/build_config.h"
12 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
13 #include "partition_alloc/partition_alloc_base/component_export.h"
14 #include "partition_alloc/partition_alloc_base/thread_annotations.h"
15 #include "partition_alloc/partition_alloc_check.h"
16 #include "partition_alloc/partition_alloc_config.h"
17 #include "partition_alloc/yield_processor.h"
18 
19 #if BUILDFLAG(IS_WIN)
20 #include "partition_alloc/partition_alloc_base/win/windows_types.h"
21 #endif
22 
23 #if BUILDFLAG(IS_POSIX)
24 #include <pthread.h>
25 
26 #include <cerrno>
27 #endif
28 
29 #if BUILDFLAG(IS_APPLE)
30 #include <os/lock.h>
31 #endif  // BUILDFLAG(IS_APPLE)
32 
33 #if BUILDFLAG(IS_FUCHSIA)
34 #include <lib/sync/mutex.h>
35 #endif
36 
37 namespace partition_alloc::internal {
38 
39 // The behavior of this class depends on whether PA_HAS_FAST_MUTEX is defined.
40 // 1. When it is defined:
41 //
42 // Simple spinning lock. It will spin in user space a set number of times before
43 // going into the kernel to sleep.
44 //
45 // This is intended to give "the best of both worlds" between a SpinLock and
46 // base::Lock:
47 // - SpinLock: Inlined fast path, no external function calls, just
48 //   compare-and-swap. Short waits do not go into the kernel. Good behavior in
49 //   low contention cases.
50 // - base::Lock: Good behavior in case of contention.
51 //
52 // We don't rely on base::Lock which we could make spin (by calling Try() in a
53 // loop), as performance is below a custom spinlock as seen on high-level
54 // benchmarks. Instead this implements a simple non-recursive mutex on top of
55 // the futex() syscall on Linux, SRWLock on Windows, os_unfair_lock on macOS,
56 // and pthread_mutex on POSIX. The main difference between this and a libc
57 // implementation is that it only supports the simplest path: private (to a
58 // process), non-recursive mutexes with no priority inheritance, no timed waits.
59 //
60 // As an interesting side-effect to be used in the allocator, this code does not
61 // make any allocations, locks are small with a constexpr constructor and no
62 // destructor.
63 //
64 // 2. Otherwise: This is a simple SpinLock, in the sense that it does not have
65 // any awareness of other threads' behavior.
PA_COMPONENT_EXPORT(PARTITION_ALLOC)66 class PA_LOCKABLE PA_COMPONENT_EXPORT(PARTITION_ALLOC) SpinningMutex {
67  public:
68   inline constexpr SpinningMutex();
69   PA_ALWAYS_INLINE void Acquire() PA_EXCLUSIVE_LOCK_FUNCTION();
70   PA_ALWAYS_INLINE void Release() PA_UNLOCK_FUNCTION();
71   PA_ALWAYS_INLINE bool Try() PA_EXCLUSIVE_TRYLOCK_FUNCTION(true);
72   void AssertAcquired() const {}  // Not supported.
73   void Reinit() PA_UNLOCK_FUNCTION();
74 
75  private:
76   PA_NOINLINE void AcquireSpinThenBlock() PA_EXCLUSIVE_LOCK_FUNCTION();
77 #if PA_CONFIG(HAS_FAST_MUTEX)
78   void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
79 #else
80   PA_ALWAYS_INLINE void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
81 #endif
82 
83   // See below, the latency of PA_YIELD_PROCESSOR can be as high as ~150
84   // cycles. Meanwhile, sleeping costs a few us. Spinning 64 times at 3GHz would
85   // cost 150 * 64 / 3e9 ~= 3.2us.
86   //
87   // This applies to Linux kernels, on x86_64. On ARM we might want to spin
88   // more.
89   static constexpr int kSpinCount = 64;
90 
91 #if PA_CONFIG(HAS_FAST_MUTEX)
92 
93 #if PA_CONFIG(HAS_LINUX_KERNEL)
94   void FutexWait();
95   void FutexWake();
96 
97   static constexpr int kUnlocked = 0;
98   static constexpr int kLockedUncontended = 1;
99   static constexpr int kLockedContended = 2;
100 
101   std::atomic<int32_t> state_{kUnlocked};
102 #elif BUILDFLAG(IS_WIN)
103   PA_CHROME_SRWLOCK lock_ = SRWLOCK_INIT;
104 #elif BUILDFLAG(IS_APPLE)
105   os_unfair_lock unfair_lock_ = OS_UNFAIR_LOCK_INIT;
106 #elif BUILDFLAG(IS_POSIX)
107   pthread_mutex_t lock_ = PTHREAD_MUTEX_INITIALIZER;
108 #elif BUILDFLAG(IS_FUCHSIA)
109   sync_mutex lock_;
110 #endif
111 
112 #else   // PA_CONFIG(HAS_FAST_MUTEX)
113   std::atomic<bool> lock_{false};
114 
115   // Spinlock-like, fallback.
116   PA_ALWAYS_INLINE bool TrySpinLock();
117   PA_ALWAYS_INLINE void ReleaseSpinLock();
118   void LockSlowSpinLock();
119 #endif  // PA_CONFIG(HAS_FAST_MUTEX)
120 };
121 
Acquire()122 PA_ALWAYS_INLINE void SpinningMutex::Acquire() {
123   // Not marked PA_LIKELY(), as:
124   // 1. We don't know how much contention the lock would experience
125   // 2. This may lead to weird-looking code layout when inlined into a caller
126   // with PA_(UN)LIKELY() annotations.
127   if (Try()) {
128     return;
129   }
130 
131   return AcquireSpinThenBlock();
132 }
133 
134 inline constexpr SpinningMutex::SpinningMutex() = default;
135 
136 #if PA_CONFIG(HAS_FAST_MUTEX)
137 
138 #if PA_CONFIG(HAS_LINUX_KERNEL)
139 
Try()140 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
141   // Using the weak variant of compare_exchange(), which may fail spuriously. On
142   // some architectures such as ARM, CAS is typically performed as a LDREX/STREX
143   // pair, where the store may fail. In the strong version, there is a loop
144   // inserted by the compiler to retry in these cases.
145   //
146   // Since we are retrying in Lock() anyway, there is no point having two nested
147   // loops.
148   int expected = kUnlocked;
149   return (state_.load(std::memory_order_relaxed) == expected) &&
150          state_.compare_exchange_weak(expected, kLockedUncontended,
151                                       std::memory_order_acquire,
152                                       std::memory_order_relaxed);
153 }
154 
Release()155 PA_ALWAYS_INLINE void SpinningMutex::Release() {
156   if (PA_UNLIKELY(state_.exchange(kUnlocked, std::memory_order_release) ==
157                   kLockedContended)) {
158     // |kLockedContended|: there is a waiter to wake up.
159     //
160     // Here there is a window where the lock is unlocked, since we just set it
161     // to |kUnlocked| above. Meaning that another thread can grab the lock
162     // in-between now and |FutexWake()| waking up a waiter. Aside from
163     // potentially fairness, this is not an issue, as the newly-awaken thread
164     // will check that the lock is still free.
165     //
166     // There is a small pessimization here though: if we have a single waiter,
167     // then when it wakes up, the lock will be set to |kLockedContended|, so
168     // when this waiter releases the lock, it will needlessly call
169     // |FutexWake()|, even though there are no waiters. This is supported by the
170     // kernel, and is what bionic (Android's libc) also does.
171     FutexWake();
172   }
173 }
174 
175 #elif BUILDFLAG(IS_WIN)
176 
Try()177 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
178   return !!::TryAcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
179 }
180 
Release()181 PA_ALWAYS_INLINE void SpinningMutex::Release() {
182   ::ReleaseSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
183 }
184 
185 #elif BUILDFLAG(IS_APPLE)
186 
Try()187 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
188   return os_unfair_lock_trylock(&unfair_lock_);
189 }
190 
Release()191 PA_ALWAYS_INLINE void SpinningMutex::Release() {
192   return os_unfair_lock_unlock(&unfair_lock_);
193 }
194 
195 #elif BUILDFLAG(IS_POSIX)
196 
Try()197 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
198   int retval = pthread_mutex_trylock(&lock_);
199   PA_DCHECK(retval == 0 || retval == EBUSY);
200   return retval == 0;
201 }
202 
Release()203 PA_ALWAYS_INLINE void SpinningMutex::Release() {
204   int retval = pthread_mutex_unlock(&lock_);
205   PA_DCHECK(retval == 0);
206 }
207 
208 #elif BUILDFLAG(IS_FUCHSIA)
209 
Try()210 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
211   return sync_mutex_trylock(&lock_) == ZX_OK;
212 }
213 
Release()214 PA_ALWAYS_INLINE void SpinningMutex::Release() {
215   sync_mutex_unlock(&lock_);
216 }
217 
218 #endif
219 
220 #else  // PA_CONFIG(HAS_FAST_MUTEX)
221 
Try()222 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
223   // Possibly faster than CAS. The theory is that if the cacheline is shared,
224   // then it can stay shared, for the contended case.
225   return !lock_.load(std::memory_order_relaxed) &&
226          !lock_.exchange(true, std::memory_order_acquire);
227 }
228 
Release()229 PA_ALWAYS_INLINE void SpinningMutex::Release() {
230   lock_.store(false, std::memory_order_release);
231 }
232 
LockSlow()233 PA_ALWAYS_INLINE void SpinningMutex::LockSlow() {
234   return LockSlowSpinLock();
235 }
236 
237 #endif  // PA_CONFIG(HAS_FAST_MUTEX)
238 
239 }  // namespace partition_alloc::internal
240 
241 #endif  // PARTITION_ALLOC_SPINNING_MUTEX_H_
242