1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef PARTITION_ALLOC_SPINNING_MUTEX_H_
6 #define PARTITION_ALLOC_SPINNING_MUTEX_H_
7
8 #include <algorithm>
9 #include <atomic>
10
11 #include "build/build_config.h"
12 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
13 #include "partition_alloc/partition_alloc_base/component_export.h"
14 #include "partition_alloc/partition_alloc_base/thread_annotations.h"
15 #include "partition_alloc/partition_alloc_check.h"
16 #include "partition_alloc/partition_alloc_config.h"
17 #include "partition_alloc/yield_processor.h"
18
19 #if BUILDFLAG(IS_WIN)
20 #include "partition_alloc/partition_alloc_base/win/windows_types.h"
21 #endif
22
23 #if BUILDFLAG(IS_POSIX)
24 #include <pthread.h>
25
26 #include <cerrno>
27 #endif
28
29 #if BUILDFLAG(IS_APPLE)
30 #include <os/lock.h>
31 #endif // BUILDFLAG(IS_APPLE)
32
33 #if BUILDFLAG(IS_FUCHSIA)
34 #include <lib/sync/mutex.h>
35 #endif
36
37 namespace partition_alloc::internal {
38
39 // The behavior of this class depends on whether PA_HAS_FAST_MUTEX is defined.
40 // 1. When it is defined:
41 //
42 // Simple spinning lock. It will spin in user space a set number of times before
43 // going into the kernel to sleep.
44 //
45 // This is intended to give "the best of both worlds" between a SpinLock and
46 // base::Lock:
47 // - SpinLock: Inlined fast path, no external function calls, just
48 // compare-and-swap. Short waits do not go into the kernel. Good behavior in
49 // low contention cases.
50 // - base::Lock: Good behavior in case of contention.
51 //
52 // We don't rely on base::Lock which we could make spin (by calling Try() in a
53 // loop), as performance is below a custom spinlock as seen on high-level
54 // benchmarks. Instead this implements a simple non-recursive mutex on top of
55 // the futex() syscall on Linux, SRWLock on Windows, os_unfair_lock on macOS,
56 // and pthread_mutex on POSIX. The main difference between this and a libc
57 // implementation is that it only supports the simplest path: private (to a
58 // process), non-recursive mutexes with no priority inheritance, no timed waits.
59 //
60 // As an interesting side-effect to be used in the allocator, this code does not
61 // make any allocations, locks are small with a constexpr constructor and no
62 // destructor.
63 //
64 // 2. Otherwise: This is a simple SpinLock, in the sense that it does not have
65 // any awareness of other threads' behavior.
PA_COMPONENT_EXPORT(PARTITION_ALLOC)66 class PA_LOCKABLE PA_COMPONENT_EXPORT(PARTITION_ALLOC) SpinningMutex {
67 public:
68 inline constexpr SpinningMutex();
69 PA_ALWAYS_INLINE void Acquire() PA_EXCLUSIVE_LOCK_FUNCTION();
70 PA_ALWAYS_INLINE void Release() PA_UNLOCK_FUNCTION();
71 PA_ALWAYS_INLINE bool Try() PA_EXCLUSIVE_TRYLOCK_FUNCTION(true);
72 void AssertAcquired() const {} // Not supported.
73 void Reinit() PA_UNLOCK_FUNCTION();
74
75 private:
76 PA_NOINLINE void AcquireSpinThenBlock() PA_EXCLUSIVE_LOCK_FUNCTION();
77 #if PA_CONFIG(HAS_FAST_MUTEX)
78 void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
79 #else
80 PA_ALWAYS_INLINE void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
81 #endif
82
83 // See below, the latency of PA_YIELD_PROCESSOR can be as high as ~150
84 // cycles. Meanwhile, sleeping costs a few us. Spinning 64 times at 3GHz would
85 // cost 150 * 64 / 3e9 ~= 3.2us.
86 //
87 // This applies to Linux kernels, on x86_64. On ARM we might want to spin
88 // more.
89 static constexpr int kSpinCount = 64;
90
91 #if PA_CONFIG(HAS_FAST_MUTEX)
92
93 #if PA_CONFIG(HAS_LINUX_KERNEL)
94 void FutexWait();
95 void FutexWake();
96
97 static constexpr int kUnlocked = 0;
98 static constexpr int kLockedUncontended = 1;
99 static constexpr int kLockedContended = 2;
100
101 std::atomic<int32_t> state_{kUnlocked};
102 #elif BUILDFLAG(IS_WIN)
103 PA_CHROME_SRWLOCK lock_ = SRWLOCK_INIT;
104 #elif BUILDFLAG(IS_APPLE)
105 os_unfair_lock unfair_lock_ = OS_UNFAIR_LOCK_INIT;
106 #elif BUILDFLAG(IS_POSIX)
107 pthread_mutex_t lock_ = PTHREAD_MUTEX_INITIALIZER;
108 #elif BUILDFLAG(IS_FUCHSIA)
109 sync_mutex lock_;
110 #endif
111
112 #else // PA_CONFIG(HAS_FAST_MUTEX)
113 std::atomic<bool> lock_{false};
114
115 // Spinlock-like, fallback.
116 PA_ALWAYS_INLINE bool TrySpinLock();
117 PA_ALWAYS_INLINE void ReleaseSpinLock();
118 void LockSlowSpinLock();
119 #endif // PA_CONFIG(HAS_FAST_MUTEX)
120 };
121
Acquire()122 PA_ALWAYS_INLINE void SpinningMutex::Acquire() {
123 // Not marked PA_LIKELY(), as:
124 // 1. We don't know how much contention the lock would experience
125 // 2. This may lead to weird-looking code layout when inlined into a caller
126 // with PA_(UN)LIKELY() annotations.
127 if (Try()) {
128 return;
129 }
130
131 return AcquireSpinThenBlock();
132 }
133
134 inline constexpr SpinningMutex::SpinningMutex() = default;
135
136 #if PA_CONFIG(HAS_FAST_MUTEX)
137
138 #if PA_CONFIG(HAS_LINUX_KERNEL)
139
Try()140 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
141 // Using the weak variant of compare_exchange(), which may fail spuriously. On
142 // some architectures such as ARM, CAS is typically performed as a LDREX/STREX
143 // pair, where the store may fail. In the strong version, there is a loop
144 // inserted by the compiler to retry in these cases.
145 //
146 // Since we are retrying in Lock() anyway, there is no point having two nested
147 // loops.
148 int expected = kUnlocked;
149 return (state_.load(std::memory_order_relaxed) == expected) &&
150 state_.compare_exchange_weak(expected, kLockedUncontended,
151 std::memory_order_acquire,
152 std::memory_order_relaxed);
153 }
154
Release()155 PA_ALWAYS_INLINE void SpinningMutex::Release() {
156 if (PA_UNLIKELY(state_.exchange(kUnlocked, std::memory_order_release) ==
157 kLockedContended)) {
158 // |kLockedContended|: there is a waiter to wake up.
159 //
160 // Here there is a window where the lock is unlocked, since we just set it
161 // to |kUnlocked| above. Meaning that another thread can grab the lock
162 // in-between now and |FutexWake()| waking up a waiter. Aside from
163 // potentially fairness, this is not an issue, as the newly-awaken thread
164 // will check that the lock is still free.
165 //
166 // There is a small pessimization here though: if we have a single waiter,
167 // then when it wakes up, the lock will be set to |kLockedContended|, so
168 // when this waiter releases the lock, it will needlessly call
169 // |FutexWake()|, even though there are no waiters. This is supported by the
170 // kernel, and is what bionic (Android's libc) also does.
171 FutexWake();
172 }
173 }
174
175 #elif BUILDFLAG(IS_WIN)
176
Try()177 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
178 return !!::TryAcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
179 }
180
Release()181 PA_ALWAYS_INLINE void SpinningMutex::Release() {
182 ::ReleaseSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
183 }
184
185 #elif BUILDFLAG(IS_APPLE)
186
Try()187 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
188 return os_unfair_lock_trylock(&unfair_lock_);
189 }
190
Release()191 PA_ALWAYS_INLINE void SpinningMutex::Release() {
192 return os_unfair_lock_unlock(&unfair_lock_);
193 }
194
195 #elif BUILDFLAG(IS_POSIX)
196
Try()197 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
198 int retval = pthread_mutex_trylock(&lock_);
199 PA_DCHECK(retval == 0 || retval == EBUSY);
200 return retval == 0;
201 }
202
Release()203 PA_ALWAYS_INLINE void SpinningMutex::Release() {
204 int retval = pthread_mutex_unlock(&lock_);
205 PA_DCHECK(retval == 0);
206 }
207
208 #elif BUILDFLAG(IS_FUCHSIA)
209
Try()210 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
211 return sync_mutex_trylock(&lock_) == ZX_OK;
212 }
213
Release()214 PA_ALWAYS_INLINE void SpinningMutex::Release() {
215 sync_mutex_unlock(&lock_);
216 }
217
218 #endif
219
220 #else // PA_CONFIG(HAS_FAST_MUTEX)
221
Try()222 PA_ALWAYS_INLINE bool SpinningMutex::Try() {
223 // Possibly faster than CAS. The theory is that if the cacheline is shared,
224 // then it can stay shared, for the contended case.
225 return !lock_.load(std::memory_order_relaxed) &&
226 !lock_.exchange(true, std::memory_order_acquire);
227 }
228
Release()229 PA_ALWAYS_INLINE void SpinningMutex::Release() {
230 lock_.store(false, std::memory_order_release);
231 }
232
LockSlow()233 PA_ALWAYS_INLINE void SpinningMutex::LockSlow() {
234 return LockSlowSpinLock();
235 }
236
237 #endif // PA_CONFIG(HAS_FAST_MUTEX)
238
239 } // namespace partition_alloc::internal
240
241 #endif // PARTITION_ALLOC_SPINNING_MUTEX_H_
242