xref: /aosp_15_r20/external/pthreadpool/src/threadpool-atomics.h (revision b095b0533730c2930f947df924a4486d266faa1a)
1*b095b053SXin Li #pragma once
2*b095b053SXin Li 
3*b095b053SXin Li #include <stdbool.h>
4*b095b053SXin Li #include <stddef.h>
5*b095b053SXin Li #include <stdint.h>
6*b095b053SXin Li 
7*b095b053SXin Li /* SSE-specific headers */
8*b095b053SXin Li #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
9*b095b053SXin Li 	#include <xmmintrin.h>
10*b095b053SXin Li #endif
11*b095b053SXin Li 
12*b095b053SXin Li /* ARM-specific headers */
13*b095b053SXin Li #if defined(__ARM_ACLE)
14*b095b053SXin Li 	#include <arm_acle.h>
15*b095b053SXin Li #endif
16*b095b053SXin Li 
17*b095b053SXin Li /* MSVC-specific headers */
18*b095b053SXin Li #ifdef _MSC_VER
19*b095b053SXin Li 	#include <intrin.h>
20*b095b053SXin Li #endif
21*b095b053SXin Li 
22*b095b053SXin Li 
23*b095b053SXin Li #if defined(__wasm__) && defined(__clang__)
24*b095b053SXin Li 	/*
25*b095b053SXin Li 	 * Clang for WebAssembly target lacks stdatomic.h header,
26*b095b053SXin Li 	 * even though it supports the necessary low-level intrinsics.
27*b095b053SXin Li 	 * Thus, we implement pthreadpool atomic functions on top of
28*b095b053SXin Li 	 * low-level Clang-specific interfaces for this target.
29*b095b053SXin Li 	 */
30*b095b053SXin Li 
31*b095b053SXin Li 	typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
32*b095b053SXin Li 	typedef _Atomic(size_t)   pthreadpool_atomic_size_t;
33*b095b053SXin Li 	typedef _Atomic(void*)    pthreadpool_atomic_void_p;
34*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)35*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
36*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
37*b095b053SXin Li 	{
38*b095b053SXin Li 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
39*b095b053SXin Li 	}
40*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)41*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
42*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
43*b095b053SXin Li 	{
44*b095b053SXin Li 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
45*b095b053SXin Li 	}
46*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)47*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
48*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
49*b095b053SXin Li 	{
50*b095b053SXin Li 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
51*b095b053SXin Li 	}
52*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)53*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
54*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
55*b095b053SXin Li 	{
56*b095b053SXin Li 		return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
57*b095b053SXin Li 	}
58*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)59*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
60*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
61*b095b053SXin Li 	{
62*b095b053SXin Li 		return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
63*b095b053SXin Li 	}
64*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)65*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
66*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
67*b095b053SXin Li 		uint32_t value)
68*b095b053SXin Li 	{
69*b095b053SXin Li 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
70*b095b053SXin Li 	}
71*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)72*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
73*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
74*b095b053SXin Li 		size_t value)
75*b095b053SXin Li 	{
76*b095b053SXin Li 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
77*b095b053SXin Li 	}
78*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)79*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
80*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
81*b095b053SXin Li 		void* value)
82*b095b053SXin Li 	{
83*b095b053SXin Li 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
84*b095b053SXin Li 	}
85*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)86*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
87*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
88*b095b053SXin Li 		uint32_t value)
89*b095b053SXin Li 	{
90*b095b053SXin Li 		__c11_atomic_store(address, value, __ATOMIC_RELEASE);
91*b095b053SXin Li 	}
92*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)93*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
94*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
95*b095b053SXin Li 		size_t value)
96*b095b053SXin Li 	{
97*b095b053SXin Li 		__c11_atomic_store(address, value, __ATOMIC_RELEASE);
98*b095b053SXin Li 	}
99*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)100*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
101*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
102*b095b053SXin Li 	{
103*b095b053SXin Li 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1;
104*b095b053SXin Li 	}
105*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)106*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
107*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
108*b095b053SXin Li 	{
109*b095b053SXin Li 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1;
110*b095b053SXin Li 	}
111*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)112*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
113*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
114*b095b053SXin Li 	{
115*b095b053SXin Li 		size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED);
116*b095b053SXin Li 		while (actual_value != 0) {
117*b095b053SXin Li 			if (__c11_atomic_compare_exchange_weak(
118*b095b053SXin Li 				value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
119*b095b053SXin Li 			{
120*b095b053SXin Li 				return true;
121*b095b053SXin Li 			}
122*b095b053SXin Li 		}
123*b095b053SXin Li 		return false;
124*b095b053SXin Li 	}
125*b095b053SXin Li 
pthreadpool_fence_acquire()126*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
127*b095b053SXin Li 		__c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
128*b095b053SXin Li 	}
129*b095b053SXin Li 
pthreadpool_fence_release()130*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
131*b095b053SXin Li 		__c11_atomic_thread_fence(__ATOMIC_RELEASE);
132*b095b053SXin Li 	}
133*b095b053SXin Li #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
134*b095b053SXin Li 	#include <stdatomic.h>
135*b095b053SXin Li 
136*b095b053SXin Li 	typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
137*b095b053SXin Li 	typedef _Atomic(size_t)   pthreadpool_atomic_size_t;
138*b095b053SXin Li 	typedef _Atomic(void*)    pthreadpool_atomic_void_p;
139*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)140*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
141*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
142*b095b053SXin Li 	{
143*b095b053SXin Li 		return atomic_load_explicit(address, memory_order_relaxed);
144*b095b053SXin Li 	}
145*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)146*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
147*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
148*b095b053SXin Li 	{
149*b095b053SXin Li 		return atomic_load_explicit(address, memory_order_relaxed);
150*b095b053SXin Li 	}
151*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)152*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
153*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
154*b095b053SXin Li 	{
155*b095b053SXin Li 		return atomic_load_explicit(address, memory_order_relaxed);
156*b095b053SXin Li 	}
157*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)158*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
159*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
160*b095b053SXin Li 	{
161*b095b053SXin Li 		return atomic_load_explicit(address, memory_order_acquire);
162*b095b053SXin Li 	}
163*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)164*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
165*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
166*b095b053SXin Li 	{
167*b095b053SXin Li 		return atomic_load_explicit(address, memory_order_acquire);
168*b095b053SXin Li 	}
169*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)170*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
171*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
172*b095b053SXin Li 		uint32_t value)
173*b095b053SXin Li 	{
174*b095b053SXin Li 		atomic_store_explicit(address, value, memory_order_relaxed);
175*b095b053SXin Li 	}
176*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)177*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
178*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
179*b095b053SXin Li 		size_t value)
180*b095b053SXin Li 	{
181*b095b053SXin Li 		atomic_store_explicit(address, value, memory_order_relaxed);
182*b095b053SXin Li 	}
183*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)184*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
185*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
186*b095b053SXin Li 		void* value)
187*b095b053SXin Li 	{
188*b095b053SXin Li 		atomic_store_explicit(address, value, memory_order_relaxed);
189*b095b053SXin Li 	}
190*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)191*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
192*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
193*b095b053SXin Li 		uint32_t value)
194*b095b053SXin Li 	{
195*b095b053SXin Li 		atomic_store_explicit(address, value, memory_order_release);
196*b095b053SXin Li 	}
197*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)198*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
199*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
200*b095b053SXin Li 		size_t value)
201*b095b053SXin Li 	{
202*b095b053SXin Li 		atomic_store_explicit(address, value, memory_order_release);
203*b095b053SXin Li 	}
204*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)205*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
206*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
207*b095b053SXin Li 	{
208*b095b053SXin Li 		return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1;
209*b095b053SXin Li 	}
210*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)211*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
212*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
213*b095b053SXin Li 	{
214*b095b053SXin Li 		return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1;
215*b095b053SXin Li 	}
216*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)217*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
218*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
219*b095b053SXin Li 	{
220*b095b053SXin Li 		#if defined(__clang__) && (defined(__arm__) || defined(__aarch64__))
221*b095b053SXin Li 			size_t actual_value;
222*b095b053SXin Li 			do {
223*b095b053SXin Li 				actual_value = __builtin_arm_ldrex((const volatile size_t*) value);
224*b095b053SXin Li 				if (actual_value == 0) {
225*b095b053SXin Li 					__builtin_arm_clrex();
226*b095b053SXin Li 					return false;
227*b095b053SXin Li 				}
228*b095b053SXin Li 			} while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0);
229*b095b053SXin Li 			return true;
230*b095b053SXin Li 		#else
231*b095b053SXin Li 			size_t actual_value = pthreadpool_load_relaxed_size_t(value);
232*b095b053SXin Li 			while (actual_value != 0) {
233*b095b053SXin Li 				if (atomic_compare_exchange_weak_explicit(
234*b095b053SXin Li 					value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed))
235*b095b053SXin Li 				{
236*b095b053SXin Li 					return true;
237*b095b053SXin Li 				}
238*b095b053SXin Li 			}
239*b095b053SXin Li 			return false;
240*b095b053SXin Li 		#endif
241*b095b053SXin Li 	}
242*b095b053SXin Li 
pthreadpool_fence_acquire()243*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
244*b095b053SXin Li 		atomic_thread_fence(memory_order_acquire);
245*b095b053SXin Li 	}
246*b095b053SXin Li 
pthreadpool_fence_release()247*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
248*b095b053SXin Li 		atomic_thread_fence(memory_order_release);
249*b095b053SXin Li 	}
250*b095b053SXin Li #elif defined(__GNUC__)
251*b095b053SXin Li 	typedef uint32_t volatile pthreadpool_atomic_uint32_t;
252*b095b053SXin Li 	typedef size_t volatile   pthreadpool_atomic_size_t;
253*b095b053SXin Li 	typedef void* volatile    pthreadpool_atomic_void_p;
254*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)255*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
256*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
257*b095b053SXin Li 	{
258*b095b053SXin Li 		return *address;
259*b095b053SXin Li 	}
260*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)261*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
262*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
263*b095b053SXin Li 	{
264*b095b053SXin Li 		return *address;
265*b095b053SXin Li 	}
266*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)267*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
268*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
269*b095b053SXin Li 	{
270*b095b053SXin Li 		return *address;
271*b095b053SXin Li 	}
272*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)273*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
274*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
275*b095b053SXin Li 	{
276*b095b053SXin Li 		return *address;
277*b095b053SXin Li 	}
278*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)279*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
280*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
281*b095b053SXin Li 	{
282*b095b053SXin Li 		return *address;
283*b095b053SXin Li 	}
284*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)285*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
286*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
287*b095b053SXin Li 		uint32_t value)
288*b095b053SXin Li 	{
289*b095b053SXin Li 		*address = value;
290*b095b053SXin Li 	}
291*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)292*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
293*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
294*b095b053SXin Li 		size_t value)
295*b095b053SXin Li 	{
296*b095b053SXin Li 		*address = value;
297*b095b053SXin Li 	}
298*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)299*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
300*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
301*b095b053SXin Li 		void* value)
302*b095b053SXin Li 	{
303*b095b053SXin Li 		*address = value;
304*b095b053SXin Li 	}
305*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)306*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
307*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
308*b095b053SXin Li 		uint32_t value)
309*b095b053SXin Li 	{
310*b095b053SXin Li 		*address = value;
311*b095b053SXin Li 	}
312*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)313*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
314*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
315*b095b053SXin Li 		size_t value)
316*b095b053SXin Li 	{
317*b095b053SXin Li 		*address = value;
318*b095b053SXin Li 	}
319*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)320*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
321*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
322*b095b053SXin Li 	{
323*b095b053SXin Li 		return __sync_sub_and_fetch(address, 1);
324*b095b053SXin Li 	}
325*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)326*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
327*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
328*b095b053SXin Li 	{
329*b095b053SXin Li 		return __sync_sub_and_fetch(address, 1);
330*b095b053SXin Li 	}
331*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)332*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
333*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
334*b095b053SXin Li 	{
335*b095b053SXin Li 		size_t actual_value = *value;
336*b095b053SXin Li 		while (actual_value != 0) {
337*b095b053SXin Li 			const size_t new_value = actual_value - 1;
338*b095b053SXin Li 			const size_t expected_value = actual_value;
339*b095b053SXin Li 			actual_value = __sync_val_compare_and_swap(value, expected_value, new_value);
340*b095b053SXin Li 			if (actual_value == expected_value) {
341*b095b053SXin Li 				return true;
342*b095b053SXin Li 			}
343*b095b053SXin Li 		}
344*b095b053SXin Li 		return false;
345*b095b053SXin Li 	}
346*b095b053SXin Li 
pthreadpool_fence_acquire()347*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
348*b095b053SXin Li 		__sync_synchronize();
349*b095b053SXin Li 	}
350*b095b053SXin Li 
pthreadpool_fence_release()351*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
352*b095b053SXin Li 		__sync_synchronize();
353*b095b053SXin Li 	}
354*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_X64)
355*b095b053SXin Li 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
356*b095b053SXin Li 	typedef volatile size_t   pthreadpool_atomic_size_t;
357*b095b053SXin Li 	typedef void *volatile    pthreadpool_atomic_void_p;
358*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)359*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
360*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
361*b095b053SXin Li 	{
362*b095b053SXin Li 		return *address;
363*b095b053SXin Li 	}
364*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)365*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
366*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
367*b095b053SXin Li 	{
368*b095b053SXin Li 		return *address;
369*b095b053SXin Li 	}
370*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)371*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
372*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
373*b095b053SXin Li 	{
374*b095b053SXin Li 		return *address;
375*b095b053SXin Li 	}
376*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)377*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
378*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
379*b095b053SXin Li 	{
380*b095b053SXin Li 		/* x86-64 loads always have acquire semantics; use only a compiler barrier */
381*b095b053SXin Li 		const uint32_t value = *address;
382*b095b053SXin Li 		_ReadBarrier();
383*b095b053SXin Li 		return value;
384*b095b053SXin Li 	}
385*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)386*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
387*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
388*b095b053SXin Li 	{
389*b095b053SXin Li 		/* x86-64 loads always have acquire semantics; use only a compiler barrier */
390*b095b053SXin Li 		const size_t value = *address;
391*b095b053SXin Li 		_ReadBarrier();
392*b095b053SXin Li 		return value;
393*b095b053SXin Li 	}
394*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)395*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
396*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
397*b095b053SXin Li 		uint32_t value)
398*b095b053SXin Li 	{
399*b095b053SXin Li 		*address = value;
400*b095b053SXin Li 	}
401*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)402*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
403*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
404*b095b053SXin Li 		size_t value)
405*b095b053SXin Li 	{
406*b095b053SXin Li 		*address = value;
407*b095b053SXin Li 	}
408*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)409*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
410*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
411*b095b053SXin Li 		void* value)
412*b095b053SXin Li 	{
413*b095b053SXin Li 		*address = value;
414*b095b053SXin Li 	}
415*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)416*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
417*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
418*b095b053SXin Li 		uint32_t value)
419*b095b053SXin Li 	{
420*b095b053SXin Li 		/* x86-64 stores always have release semantics; use only a compiler barrier */
421*b095b053SXin Li 		_WriteBarrier();
422*b095b053SXin Li 		*address = value;
423*b095b053SXin Li 	}
424*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)425*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
426*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
427*b095b053SXin Li 		size_t value)
428*b095b053SXin Li 	{
429*b095b053SXin Li 		/* x86-64 stores always have release semantics; use only a compiler barrier */
430*b095b053SXin Li 		_WriteBarrier();
431*b095b053SXin Li 		*address = value;
432*b095b053SXin Li 	}
433*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)434*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
435*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
436*b095b053SXin Li 	{
437*b095b053SXin Li 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
438*b095b053SXin Li 	}
439*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)440*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
441*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
442*b095b053SXin Li 	{
443*b095b053SXin Li 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
444*b095b053SXin Li 	}
445*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)446*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
447*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
448*b095b053SXin Li 	{
449*b095b053SXin Li 		size_t actual_value = *value;
450*b095b053SXin Li 		while (actual_value != 0) {
451*b095b053SXin Li 			const size_t new_value = actual_value - 1;
452*b095b053SXin Li 			const size_t expected_value = actual_value;
453*b095b053SXin Li 			actual_value = _InterlockedCompareExchange64(
454*b095b053SXin Li 				(volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
455*b095b053SXin Li 			if (actual_value == expected_value) {
456*b095b053SXin Li 				return true;
457*b095b053SXin Li 			}
458*b095b053SXin Li 		}
459*b095b053SXin Li 		return false;
460*b095b053SXin Li 	}
461*b095b053SXin Li 
pthreadpool_fence_acquire()462*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
463*b095b053SXin Li 		_mm_lfence();
464*b095b053SXin Li 		_ReadBarrier();
465*b095b053SXin Li 	}
466*b095b053SXin Li 
pthreadpool_fence_release()467*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
468*b095b053SXin Li 		_WriteBarrier();
469*b095b053SXin Li 		_mm_sfence();
470*b095b053SXin Li 	}
471*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_IX86)
472*b095b053SXin Li 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
473*b095b053SXin Li 	typedef volatile size_t   pthreadpool_atomic_size_t;
474*b095b053SXin Li 	typedef void *volatile    pthreadpool_atomic_void_p;
475*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)476*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
477*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
478*b095b053SXin Li 	{
479*b095b053SXin Li 		return *address;
480*b095b053SXin Li 	}
481*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)482*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
483*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
484*b095b053SXin Li 	{
485*b095b053SXin Li 		return *address;
486*b095b053SXin Li 	}
487*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)488*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
489*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
490*b095b053SXin Li 	{
491*b095b053SXin Li 		return *address;
492*b095b053SXin Li 	}
493*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)494*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
495*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
496*b095b053SXin Li 	{
497*b095b053SXin Li 		/* x86 loads always have acquire semantics; use only a compiler barrier */
498*b095b053SXin Li 		const uint32_t value = *address;
499*b095b053SXin Li 		_ReadBarrier();
500*b095b053SXin Li 		return value;
501*b095b053SXin Li 	}
502*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)503*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
504*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
505*b095b053SXin Li 	{
506*b095b053SXin Li 		/* x86 loads always have acquire semantics; use only a compiler barrier */
507*b095b053SXin Li 		const size_t value = *address;
508*b095b053SXin Li 		_ReadBarrier();
509*b095b053SXin Li 		return value;
510*b095b053SXin Li 	}
511*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)512*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
513*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
514*b095b053SXin Li 		uint32_t value)
515*b095b053SXin Li 	{
516*b095b053SXin Li 		*address = value;
517*b095b053SXin Li 	}
518*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)519*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
520*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
521*b095b053SXin Li 		size_t value)
522*b095b053SXin Li 	{
523*b095b053SXin Li 		*address = value;
524*b095b053SXin Li 	}
525*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)526*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
527*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
528*b095b053SXin Li 		void* value)
529*b095b053SXin Li 	{
530*b095b053SXin Li 		*address = value;
531*b095b053SXin Li 	}
532*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)533*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
534*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
535*b095b053SXin Li 		uint32_t value)
536*b095b053SXin Li 	{
537*b095b053SXin Li 		/* x86 stores always have release semantics; use only a compiler barrier */
538*b095b053SXin Li 		_WriteBarrier();
539*b095b053SXin Li 		*address = value;
540*b095b053SXin Li 	}
541*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)542*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
543*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
544*b095b053SXin Li 		size_t value)
545*b095b053SXin Li 	{
546*b095b053SXin Li 		/* x86 stores always have release semantics; use only a compiler barrier */
547*b095b053SXin Li 		_WriteBarrier();
548*b095b053SXin Li 		*address = value;
549*b095b053SXin Li 	}
550*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)551*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
552*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
553*b095b053SXin Li 	{
554*b095b053SXin Li 		return (size_t) _InterlockedDecrement((volatile long*) address);
555*b095b053SXin Li 	}
556*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)557*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
558*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
559*b095b053SXin Li 	{
560*b095b053SXin Li 		return (size_t) _InterlockedDecrement((volatile long*) address);
561*b095b053SXin Li 	}
562*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)563*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
564*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
565*b095b053SXin Li 	{
566*b095b053SXin Li 		size_t actual_value = *value;
567*b095b053SXin Li 		while (actual_value != 0) {
568*b095b053SXin Li 			const size_t new_value = actual_value - 1;
569*b095b053SXin Li 			const size_t expected_value = actual_value;
570*b095b053SXin Li 			actual_value = _InterlockedCompareExchange(
571*b095b053SXin Li 				(volatile long*) value, (long) new_value, (long) expected_value);
572*b095b053SXin Li 			if (actual_value == expected_value) {
573*b095b053SXin Li 				return true;
574*b095b053SXin Li 			}
575*b095b053SXin Li 		}
576*b095b053SXin Li 		return false;
577*b095b053SXin Li 	}
578*b095b053SXin Li 
pthreadpool_fence_acquire()579*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
580*b095b053SXin Li 		_mm_lfence();
581*b095b053SXin Li 	}
582*b095b053SXin Li 
pthreadpool_fence_release()583*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
584*b095b053SXin Li 		_mm_sfence();
585*b095b053SXin Li 	}
586*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_ARM64)
587*b095b053SXin Li 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
588*b095b053SXin Li 	typedef volatile size_t   pthreadpool_atomic_size_t;
589*b095b053SXin Li 	typedef void *volatile    pthreadpool_atomic_void_p;
590*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)591*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
592*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
593*b095b053SXin Li 	{
594*b095b053SXin Li 		return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
595*b095b053SXin Li 	}
596*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)597*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
598*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
599*b095b053SXin Li 	{
600*b095b053SXin Li 		return (size_t) __iso_volatile_load64((const volatile __int64*) address);
601*b095b053SXin Li 	}
602*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)603*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
604*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
605*b095b053SXin Li 	{
606*b095b053SXin Li 		return (void*) __iso_volatile_load64((const volatile __int64*) address);
607*b095b053SXin Li 	}
608*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)609*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
610*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
611*b095b053SXin Li 	{
612*b095b053SXin Li 		return (uint32_t) __ldar32((volatile unsigned __int32*) address);
613*b095b053SXin Li 	}
614*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)615*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
616*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
617*b095b053SXin Li 	{
618*b095b053SXin Li 		return (size_t) __ldar64((volatile unsigned __int64*) address);
619*b095b053SXin Li 	}
620*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)621*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
622*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
623*b095b053SXin Li 		uint32_t value)
624*b095b053SXin Li 	{
625*b095b053SXin Li 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
626*b095b053SXin Li 	}
627*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)628*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
629*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
630*b095b053SXin Li 		size_t value)
631*b095b053SXin Li 	{
632*b095b053SXin Li 		__iso_volatile_store64((volatile __int64*) address, (__int64) value);
633*b095b053SXin Li 	}
634*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)635*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
636*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
637*b095b053SXin Li 		void* value)
638*b095b053SXin Li 	{
639*b095b053SXin Li 		__iso_volatile_store64((volatile __int64*) address, (__int64) value);
640*b095b053SXin Li 	}
641*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)642*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
643*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
644*b095b053SXin Li 		uint32_t value)
645*b095b053SXin Li 	{
646*b095b053SXin Li 		_WriteBarrier();
647*b095b053SXin Li 		__stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
648*b095b053SXin Li 	}
649*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)650*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
651*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
652*b095b053SXin Li 		size_t value)
653*b095b053SXin Li 	{
654*b095b053SXin Li 		_WriteBarrier();
655*b095b053SXin Li 		__stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
656*b095b053SXin Li 	}
657*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)658*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
659*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
660*b095b053SXin Li 	{
661*b095b053SXin Li 		return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
662*b095b053SXin Li 	}
663*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)664*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
665*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
666*b095b053SXin Li 	{
667*b095b053SXin Li 		return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
668*b095b053SXin Li 	}
669*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)670*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
671*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
672*b095b053SXin Li 	{
673*b095b053SXin Li 		size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
674*b095b053SXin Li 		while (actual_value != 0) {
675*b095b053SXin Li 			const size_t new_value = actual_value - 1;
676*b095b053SXin Li 			const size_t expected_value = actual_value;
677*b095b053SXin Li 			actual_value = _InterlockedCompareExchange64_nf(
678*b095b053SXin Li 				(volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
679*b095b053SXin Li 			if (actual_value == expected_value) {
680*b095b053SXin Li 				return true;
681*b095b053SXin Li 			}
682*b095b053SXin Li 		}
683*b095b053SXin Li 		return false;
684*b095b053SXin Li 	}
685*b095b053SXin Li 
pthreadpool_fence_acquire()686*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
687*b095b053SXin Li 		__dmb(_ARM64_BARRIER_ISHLD);
688*b095b053SXin Li 		_ReadBarrier();
689*b095b053SXin Li 	}
690*b095b053SXin Li 
pthreadpool_fence_release()691*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
692*b095b053SXin Li 		_WriteBarrier();
693*b095b053SXin Li 		__dmb(_ARM64_BARRIER_ISH);
694*b095b053SXin Li 	}
695*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_ARM)
696*b095b053SXin Li 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
697*b095b053SXin Li 	typedef volatile size_t   pthreadpool_atomic_size_t;
698*b095b053SXin Li 	typedef void *volatile    pthreadpool_atomic_void_p;
699*b095b053SXin Li 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)700*b095b053SXin Li 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
701*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
702*b095b053SXin Li 	{
703*b095b053SXin Li 		return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
704*b095b053SXin Li 	}
705*b095b053SXin Li 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)706*b095b053SXin Li 	static inline size_t pthreadpool_load_relaxed_size_t(
707*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
708*b095b053SXin Li 	{
709*b095b053SXin Li 		return (size_t) __iso_volatile_load32((const volatile __int32*) address);
710*b095b053SXin Li 	}
711*b095b053SXin Li 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)712*b095b053SXin Li 	static inline void* pthreadpool_load_relaxed_void_p(
713*b095b053SXin Li 		pthreadpool_atomic_void_p* address)
714*b095b053SXin Li 	{
715*b095b053SXin Li 		return (void*) __iso_volatile_load32((const volatile __int32*) address);
716*b095b053SXin Li 	}
717*b095b053SXin Li 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)718*b095b053SXin Li 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
719*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address)
720*b095b053SXin Li 	{
721*b095b053SXin Li 		const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
722*b095b053SXin Li 		__dmb(_ARM_BARRIER_ISH);
723*b095b053SXin Li 		_ReadBarrier();
724*b095b053SXin Li 		return value;
725*b095b053SXin Li 	}
726*b095b053SXin Li 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)727*b095b053SXin Li 	static inline size_t pthreadpool_load_acquire_size_t(
728*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
729*b095b053SXin Li 	{
730*b095b053SXin Li 		const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address);
731*b095b053SXin Li 		__dmb(_ARM_BARRIER_ISH);
732*b095b053SXin Li 		_ReadBarrier();
733*b095b053SXin Li 		return value;
734*b095b053SXin Li 	}
735*b095b053SXin Li 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)736*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_uint32_t(
737*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
738*b095b053SXin Li 		uint32_t value)
739*b095b053SXin Li 	{
740*b095b053SXin Li 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
741*b095b053SXin Li 	}
742*b095b053SXin Li 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)743*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_size_t(
744*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
745*b095b053SXin Li 		size_t value)
746*b095b053SXin Li 	{
747*b095b053SXin Li 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
748*b095b053SXin Li 	}
749*b095b053SXin Li 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)750*b095b053SXin Li 	static inline void pthreadpool_store_relaxed_void_p(
751*b095b053SXin Li 		pthreadpool_atomic_void_p* address,
752*b095b053SXin Li 		void* value)
753*b095b053SXin Li 	{
754*b095b053SXin Li 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
755*b095b053SXin Li 	}
756*b095b053SXin Li 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)757*b095b053SXin Li 	static inline void pthreadpool_store_release_uint32_t(
758*b095b053SXin Li 		pthreadpool_atomic_uint32_t* address,
759*b095b053SXin Li 		uint32_t value)
760*b095b053SXin Li 	{
761*b095b053SXin Li 		_WriteBarrier();
762*b095b053SXin Li 		__dmb(_ARM_BARRIER_ISH);
763*b095b053SXin Li 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
764*b095b053SXin Li 	}
765*b095b053SXin Li 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)766*b095b053SXin Li 	static inline void pthreadpool_store_release_size_t(
767*b095b053SXin Li 		pthreadpool_atomic_size_t* address,
768*b095b053SXin Li 		size_t value)
769*b095b053SXin Li 	{
770*b095b053SXin Li 		_WriteBarrier();
771*b095b053SXin Li 		__dmb(_ARM_BARRIER_ISH);
772*b095b053SXin Li 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
773*b095b053SXin Li 	}
774*b095b053SXin Li 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)775*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
776*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
777*b095b053SXin Li 	{
778*b095b053SXin Li 		return (size_t) _InterlockedDecrement_nf((volatile long*) address);
779*b095b053SXin Li 	}
780*b095b053SXin Li 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)781*b095b053SXin Li 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
782*b095b053SXin Li 		pthreadpool_atomic_size_t* address)
783*b095b053SXin Li 	{
784*b095b053SXin Li 		return (size_t) _InterlockedDecrement_rel((volatile long*) address);
785*b095b053SXin Li 	}
786*b095b053SXin Li 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)787*b095b053SXin Li 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
788*b095b053SXin Li 		pthreadpool_atomic_size_t* value)
789*b095b053SXin Li 	{
790*b095b053SXin Li 		size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value);
791*b095b053SXin Li 		while (actual_value != 0) {
792*b095b053SXin Li 			const size_t new_value = actual_value - 1;
793*b095b053SXin Li 			const size_t expected_value = actual_value;
794*b095b053SXin Li 			actual_value = _InterlockedCompareExchange_nf(
795*b095b053SXin Li 				(volatile long*) value, (long) new_value, (long) expected_value);
796*b095b053SXin Li 			if (actual_value == expected_value) {
797*b095b053SXin Li 				return true;
798*b095b053SXin Li 			}
799*b095b053SXin Li 		}
800*b095b053SXin Li 		return false;
801*b095b053SXin Li 	}
802*b095b053SXin Li 
pthreadpool_fence_acquire()803*b095b053SXin Li 	static inline void pthreadpool_fence_acquire() {
804*b095b053SXin Li 		__dmb(_ARM_BARRIER_ISH);
805*b095b053SXin Li 		_ReadBarrier();
806*b095b053SXin Li 	}
807*b095b053SXin Li 
pthreadpool_fence_release()808*b095b053SXin Li 	static inline void pthreadpool_fence_release() {
809*b095b053SXin Li 		_WriteBarrier();
810*b095b053SXin Li 		__dmb(_ARM_BARRIER_ISH);
811*b095b053SXin Li 	}
812*b095b053SXin Li #else
813*b095b053SXin Li 	#error "Platform-specific implementation of threadpool-atomics.h required"
814*b095b053SXin Li #endif
815*b095b053SXin Li 
816*b095b053SXin Li #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
pthreadpool_yield()817*b095b053SXin Li 	static inline void pthreadpool_yield() {
818*b095b053SXin Li 		_mm_pause();
819*b095b053SXin Li 	}
820*b095b053SXin Li #elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
pthreadpool_yield()821*b095b053SXin Li 	static inline void pthreadpool_yield() {
822*b095b053SXin Li 		__yield();
823*b095b053SXin Li 	}
824*b095b053SXin Li #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__))
pthreadpool_yield()825*b095b053SXin Li 	static inline void pthreadpool_yield() {
826*b095b053SXin Li 		__asm__ __volatile__("yield");
827*b095b053SXin Li 	}
828*b095b053SXin Li #else
pthreadpool_yield()829*b095b053SXin Li 	static inline void pthreadpool_yield() {
830*b095b053SXin Li 		pthreadpool_fence_acquire();
831*b095b053SXin Li 	}
832*b095b053SXin Li #endif
833