1*b095b053SXin Li #pragma once 2*b095b053SXin Li 3*b095b053SXin Li #include <stdbool.h> 4*b095b053SXin Li #include <stddef.h> 5*b095b053SXin Li #include <stdint.h> 6*b095b053SXin Li 7*b095b053SXin Li /* SSE-specific headers */ 8*b095b053SXin Li #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) 9*b095b053SXin Li #include <xmmintrin.h> 10*b095b053SXin Li #endif 11*b095b053SXin Li 12*b095b053SXin Li /* ARM-specific headers */ 13*b095b053SXin Li #if defined(__ARM_ACLE) 14*b095b053SXin Li #include <arm_acle.h> 15*b095b053SXin Li #endif 16*b095b053SXin Li 17*b095b053SXin Li /* MSVC-specific headers */ 18*b095b053SXin Li #ifdef _MSC_VER 19*b095b053SXin Li #include <intrin.h> 20*b095b053SXin Li #endif 21*b095b053SXin Li 22*b095b053SXin Li 23*b095b053SXin Li #if defined(__wasm__) && defined(__clang__) 24*b095b053SXin Li /* 25*b095b053SXin Li * Clang for WebAssembly target lacks stdatomic.h header, 26*b095b053SXin Li * even though it supports the necessary low-level intrinsics. 27*b095b053SXin Li * Thus, we implement pthreadpool atomic functions on top of 28*b095b053SXin Li * low-level Clang-specific interfaces for this target. 29*b095b053SXin Li */ 30*b095b053SXin Li 31*b095b053SXin Li typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; 32*b095b053SXin Li typedef _Atomic(size_t) pthreadpool_atomic_size_t; 33*b095b053SXin Li typedef _Atomic(void*) pthreadpool_atomic_void_p; 34*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)35*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 36*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 37*b095b053SXin Li { 38*b095b053SXin Li return __c11_atomic_load(address, __ATOMIC_RELAXED); 39*b095b053SXin Li } 40*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)41*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 42*b095b053SXin Li pthreadpool_atomic_size_t* address) 43*b095b053SXin Li { 44*b095b053SXin Li return __c11_atomic_load(address, __ATOMIC_RELAXED); 45*b095b053SXin Li } 46*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)47*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 48*b095b053SXin Li pthreadpool_atomic_void_p* address) 49*b095b053SXin Li { 50*b095b053SXin Li return __c11_atomic_load(address, __ATOMIC_RELAXED); 51*b095b053SXin Li } 52*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)53*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 54*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 55*b095b053SXin Li { 56*b095b053SXin Li return __c11_atomic_load(address, __ATOMIC_ACQUIRE); 57*b095b053SXin Li } 58*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)59*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 60*b095b053SXin Li pthreadpool_atomic_size_t* address) 61*b095b053SXin Li { 62*b095b053SXin Li return __c11_atomic_load(address, __ATOMIC_ACQUIRE); 63*b095b053SXin Li } 64*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)65*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 66*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 67*b095b053SXin Li uint32_t value) 68*b095b053SXin Li { 69*b095b053SXin Li __c11_atomic_store(address, value, __ATOMIC_RELAXED); 70*b095b053SXin Li } 71*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)72*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 73*b095b053SXin Li pthreadpool_atomic_size_t* address, 74*b095b053SXin Li size_t value) 75*b095b053SXin Li { 76*b095b053SXin Li __c11_atomic_store(address, value, __ATOMIC_RELAXED); 77*b095b053SXin Li } 78*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)79*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 80*b095b053SXin Li pthreadpool_atomic_void_p* address, 81*b095b053SXin Li void* value) 82*b095b053SXin Li { 83*b095b053SXin Li __c11_atomic_store(address, value, __ATOMIC_RELAXED); 84*b095b053SXin Li } 85*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)86*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 87*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 88*b095b053SXin Li uint32_t value) 89*b095b053SXin Li { 90*b095b053SXin Li __c11_atomic_store(address, value, __ATOMIC_RELEASE); 91*b095b053SXin Li } 92*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)93*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 94*b095b053SXin Li pthreadpool_atomic_size_t* address, 95*b095b053SXin Li size_t value) 96*b095b053SXin Li { 97*b095b053SXin Li __c11_atomic_store(address, value, __ATOMIC_RELEASE); 98*b095b053SXin Li } 99*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)100*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 101*b095b053SXin Li pthreadpool_atomic_size_t* address) 102*b095b053SXin Li { 103*b095b053SXin Li return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1; 104*b095b053SXin Li } 105*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)106*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 107*b095b053SXin Li pthreadpool_atomic_size_t* address) 108*b095b053SXin Li { 109*b095b053SXin Li return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1; 110*b095b053SXin Li } 111*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)112*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 113*b095b053SXin Li pthreadpool_atomic_size_t* value) 114*b095b053SXin Li { 115*b095b053SXin Li size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED); 116*b095b053SXin Li while (actual_value != 0) { 117*b095b053SXin Li if (__c11_atomic_compare_exchange_weak( 118*b095b053SXin Li value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 119*b095b053SXin Li { 120*b095b053SXin Li return true; 121*b095b053SXin Li } 122*b095b053SXin Li } 123*b095b053SXin Li return false; 124*b095b053SXin Li } 125*b095b053SXin Li pthreadpool_fence_acquire()126*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 127*b095b053SXin Li __c11_atomic_thread_fence(__ATOMIC_ACQUIRE); 128*b095b053SXin Li } 129*b095b053SXin Li pthreadpool_fence_release()130*b095b053SXin Li static inline void pthreadpool_fence_release() { 131*b095b053SXin Li __c11_atomic_thread_fence(__ATOMIC_RELEASE); 132*b095b053SXin Li } 133*b095b053SXin Li #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) 134*b095b053SXin Li #include <stdatomic.h> 135*b095b053SXin Li 136*b095b053SXin Li typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; 137*b095b053SXin Li typedef _Atomic(size_t) pthreadpool_atomic_size_t; 138*b095b053SXin Li typedef _Atomic(void*) pthreadpool_atomic_void_p; 139*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)140*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 141*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 142*b095b053SXin Li { 143*b095b053SXin Li return atomic_load_explicit(address, memory_order_relaxed); 144*b095b053SXin Li } 145*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)146*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 147*b095b053SXin Li pthreadpool_atomic_size_t* address) 148*b095b053SXin Li { 149*b095b053SXin Li return atomic_load_explicit(address, memory_order_relaxed); 150*b095b053SXin Li } 151*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)152*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 153*b095b053SXin Li pthreadpool_atomic_void_p* address) 154*b095b053SXin Li { 155*b095b053SXin Li return atomic_load_explicit(address, memory_order_relaxed); 156*b095b053SXin Li } 157*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)158*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 159*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 160*b095b053SXin Li { 161*b095b053SXin Li return atomic_load_explicit(address, memory_order_acquire); 162*b095b053SXin Li } 163*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)164*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 165*b095b053SXin Li pthreadpool_atomic_size_t* address) 166*b095b053SXin Li { 167*b095b053SXin Li return atomic_load_explicit(address, memory_order_acquire); 168*b095b053SXin Li } 169*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)170*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 171*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 172*b095b053SXin Li uint32_t value) 173*b095b053SXin Li { 174*b095b053SXin Li atomic_store_explicit(address, value, memory_order_relaxed); 175*b095b053SXin Li } 176*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)177*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 178*b095b053SXin Li pthreadpool_atomic_size_t* address, 179*b095b053SXin Li size_t value) 180*b095b053SXin Li { 181*b095b053SXin Li atomic_store_explicit(address, value, memory_order_relaxed); 182*b095b053SXin Li } 183*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)184*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 185*b095b053SXin Li pthreadpool_atomic_void_p* address, 186*b095b053SXin Li void* value) 187*b095b053SXin Li { 188*b095b053SXin Li atomic_store_explicit(address, value, memory_order_relaxed); 189*b095b053SXin Li } 190*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)191*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 192*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 193*b095b053SXin Li uint32_t value) 194*b095b053SXin Li { 195*b095b053SXin Li atomic_store_explicit(address, value, memory_order_release); 196*b095b053SXin Li } 197*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)198*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 199*b095b053SXin Li pthreadpool_atomic_size_t* address, 200*b095b053SXin Li size_t value) 201*b095b053SXin Li { 202*b095b053SXin Li atomic_store_explicit(address, value, memory_order_release); 203*b095b053SXin Li } 204*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)205*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 206*b095b053SXin Li pthreadpool_atomic_size_t* address) 207*b095b053SXin Li { 208*b095b053SXin Li return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1; 209*b095b053SXin Li } 210*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)211*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 212*b095b053SXin Li pthreadpool_atomic_size_t* address) 213*b095b053SXin Li { 214*b095b053SXin Li return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1; 215*b095b053SXin Li } 216*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)217*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 218*b095b053SXin Li pthreadpool_atomic_size_t* value) 219*b095b053SXin Li { 220*b095b053SXin Li #if defined(__clang__) && (defined(__arm__) || defined(__aarch64__)) 221*b095b053SXin Li size_t actual_value; 222*b095b053SXin Li do { 223*b095b053SXin Li actual_value = __builtin_arm_ldrex((const volatile size_t*) value); 224*b095b053SXin Li if (actual_value == 0) { 225*b095b053SXin Li __builtin_arm_clrex(); 226*b095b053SXin Li return false; 227*b095b053SXin Li } 228*b095b053SXin Li } while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0); 229*b095b053SXin Li return true; 230*b095b053SXin Li #else 231*b095b053SXin Li size_t actual_value = pthreadpool_load_relaxed_size_t(value); 232*b095b053SXin Li while (actual_value != 0) { 233*b095b053SXin Li if (atomic_compare_exchange_weak_explicit( 234*b095b053SXin Li value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed)) 235*b095b053SXin Li { 236*b095b053SXin Li return true; 237*b095b053SXin Li } 238*b095b053SXin Li } 239*b095b053SXin Li return false; 240*b095b053SXin Li #endif 241*b095b053SXin Li } 242*b095b053SXin Li pthreadpool_fence_acquire()243*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 244*b095b053SXin Li atomic_thread_fence(memory_order_acquire); 245*b095b053SXin Li } 246*b095b053SXin Li pthreadpool_fence_release()247*b095b053SXin Li static inline void pthreadpool_fence_release() { 248*b095b053SXin Li atomic_thread_fence(memory_order_release); 249*b095b053SXin Li } 250*b095b053SXin Li #elif defined(__GNUC__) 251*b095b053SXin Li typedef uint32_t volatile pthreadpool_atomic_uint32_t; 252*b095b053SXin Li typedef size_t volatile pthreadpool_atomic_size_t; 253*b095b053SXin Li typedef void* volatile pthreadpool_atomic_void_p; 254*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)255*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 256*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 257*b095b053SXin Li { 258*b095b053SXin Li return *address; 259*b095b053SXin Li } 260*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)261*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 262*b095b053SXin Li pthreadpool_atomic_size_t* address) 263*b095b053SXin Li { 264*b095b053SXin Li return *address; 265*b095b053SXin Li } 266*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)267*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 268*b095b053SXin Li pthreadpool_atomic_void_p* address) 269*b095b053SXin Li { 270*b095b053SXin Li return *address; 271*b095b053SXin Li } 272*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)273*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 274*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 275*b095b053SXin Li { 276*b095b053SXin Li return *address; 277*b095b053SXin Li } 278*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)279*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 280*b095b053SXin Li pthreadpool_atomic_size_t* address) 281*b095b053SXin Li { 282*b095b053SXin Li return *address; 283*b095b053SXin Li } 284*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)285*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 286*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 287*b095b053SXin Li uint32_t value) 288*b095b053SXin Li { 289*b095b053SXin Li *address = value; 290*b095b053SXin Li } 291*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)292*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 293*b095b053SXin Li pthreadpool_atomic_size_t* address, 294*b095b053SXin Li size_t value) 295*b095b053SXin Li { 296*b095b053SXin Li *address = value; 297*b095b053SXin Li } 298*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)299*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 300*b095b053SXin Li pthreadpool_atomic_void_p* address, 301*b095b053SXin Li void* value) 302*b095b053SXin Li { 303*b095b053SXin Li *address = value; 304*b095b053SXin Li } 305*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)306*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 307*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 308*b095b053SXin Li uint32_t value) 309*b095b053SXin Li { 310*b095b053SXin Li *address = value; 311*b095b053SXin Li } 312*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)313*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 314*b095b053SXin Li pthreadpool_atomic_size_t* address, 315*b095b053SXin Li size_t value) 316*b095b053SXin Li { 317*b095b053SXin Li *address = value; 318*b095b053SXin Li } 319*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)320*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 321*b095b053SXin Li pthreadpool_atomic_size_t* address) 322*b095b053SXin Li { 323*b095b053SXin Li return __sync_sub_and_fetch(address, 1); 324*b095b053SXin Li } 325*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)326*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 327*b095b053SXin Li pthreadpool_atomic_size_t* address) 328*b095b053SXin Li { 329*b095b053SXin Li return __sync_sub_and_fetch(address, 1); 330*b095b053SXin Li } 331*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)332*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 333*b095b053SXin Li pthreadpool_atomic_size_t* value) 334*b095b053SXin Li { 335*b095b053SXin Li size_t actual_value = *value; 336*b095b053SXin Li while (actual_value != 0) { 337*b095b053SXin Li const size_t new_value = actual_value - 1; 338*b095b053SXin Li const size_t expected_value = actual_value; 339*b095b053SXin Li actual_value = __sync_val_compare_and_swap(value, expected_value, new_value); 340*b095b053SXin Li if (actual_value == expected_value) { 341*b095b053SXin Li return true; 342*b095b053SXin Li } 343*b095b053SXin Li } 344*b095b053SXin Li return false; 345*b095b053SXin Li } 346*b095b053SXin Li pthreadpool_fence_acquire()347*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 348*b095b053SXin Li __sync_synchronize(); 349*b095b053SXin Li } 350*b095b053SXin Li pthreadpool_fence_release()351*b095b053SXin Li static inline void pthreadpool_fence_release() { 352*b095b053SXin Li __sync_synchronize(); 353*b095b053SXin Li } 354*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_X64) 355*b095b053SXin Li typedef volatile uint32_t pthreadpool_atomic_uint32_t; 356*b095b053SXin Li typedef volatile size_t pthreadpool_atomic_size_t; 357*b095b053SXin Li typedef void *volatile pthreadpool_atomic_void_p; 358*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)359*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 360*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 361*b095b053SXin Li { 362*b095b053SXin Li return *address; 363*b095b053SXin Li } 364*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)365*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 366*b095b053SXin Li pthreadpool_atomic_size_t* address) 367*b095b053SXin Li { 368*b095b053SXin Li return *address; 369*b095b053SXin Li } 370*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)371*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 372*b095b053SXin Li pthreadpool_atomic_void_p* address) 373*b095b053SXin Li { 374*b095b053SXin Li return *address; 375*b095b053SXin Li } 376*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)377*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 378*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 379*b095b053SXin Li { 380*b095b053SXin Li /* x86-64 loads always have acquire semantics; use only a compiler barrier */ 381*b095b053SXin Li const uint32_t value = *address; 382*b095b053SXin Li _ReadBarrier(); 383*b095b053SXin Li return value; 384*b095b053SXin Li } 385*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)386*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 387*b095b053SXin Li pthreadpool_atomic_size_t* address) 388*b095b053SXin Li { 389*b095b053SXin Li /* x86-64 loads always have acquire semantics; use only a compiler barrier */ 390*b095b053SXin Li const size_t value = *address; 391*b095b053SXin Li _ReadBarrier(); 392*b095b053SXin Li return value; 393*b095b053SXin Li } 394*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)395*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 396*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 397*b095b053SXin Li uint32_t value) 398*b095b053SXin Li { 399*b095b053SXin Li *address = value; 400*b095b053SXin Li } 401*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)402*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 403*b095b053SXin Li pthreadpool_atomic_size_t* address, 404*b095b053SXin Li size_t value) 405*b095b053SXin Li { 406*b095b053SXin Li *address = value; 407*b095b053SXin Li } 408*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)409*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 410*b095b053SXin Li pthreadpool_atomic_void_p* address, 411*b095b053SXin Li void* value) 412*b095b053SXin Li { 413*b095b053SXin Li *address = value; 414*b095b053SXin Li } 415*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)416*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 417*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 418*b095b053SXin Li uint32_t value) 419*b095b053SXin Li { 420*b095b053SXin Li /* x86-64 stores always have release semantics; use only a compiler barrier */ 421*b095b053SXin Li _WriteBarrier(); 422*b095b053SXin Li *address = value; 423*b095b053SXin Li } 424*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)425*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 426*b095b053SXin Li pthreadpool_atomic_size_t* address, 427*b095b053SXin Li size_t value) 428*b095b053SXin Li { 429*b095b053SXin Li /* x86-64 stores always have release semantics; use only a compiler barrier */ 430*b095b053SXin Li _WriteBarrier(); 431*b095b053SXin Li *address = value; 432*b095b053SXin Li } 433*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)434*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 435*b095b053SXin Li pthreadpool_atomic_size_t* address) 436*b095b053SXin Li { 437*b095b053SXin Li return (size_t) _InterlockedDecrement64((volatile __int64*) address); 438*b095b053SXin Li } 439*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)440*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 441*b095b053SXin Li pthreadpool_atomic_size_t* address) 442*b095b053SXin Li { 443*b095b053SXin Li return (size_t) _InterlockedDecrement64((volatile __int64*) address); 444*b095b053SXin Li } 445*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)446*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 447*b095b053SXin Li pthreadpool_atomic_size_t* value) 448*b095b053SXin Li { 449*b095b053SXin Li size_t actual_value = *value; 450*b095b053SXin Li while (actual_value != 0) { 451*b095b053SXin Li const size_t new_value = actual_value - 1; 452*b095b053SXin Li const size_t expected_value = actual_value; 453*b095b053SXin Li actual_value = _InterlockedCompareExchange64( 454*b095b053SXin Li (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); 455*b095b053SXin Li if (actual_value == expected_value) { 456*b095b053SXin Li return true; 457*b095b053SXin Li } 458*b095b053SXin Li } 459*b095b053SXin Li return false; 460*b095b053SXin Li } 461*b095b053SXin Li pthreadpool_fence_acquire()462*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 463*b095b053SXin Li _mm_lfence(); 464*b095b053SXin Li _ReadBarrier(); 465*b095b053SXin Li } 466*b095b053SXin Li pthreadpool_fence_release()467*b095b053SXin Li static inline void pthreadpool_fence_release() { 468*b095b053SXin Li _WriteBarrier(); 469*b095b053SXin Li _mm_sfence(); 470*b095b053SXin Li } 471*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_IX86) 472*b095b053SXin Li typedef volatile uint32_t pthreadpool_atomic_uint32_t; 473*b095b053SXin Li typedef volatile size_t pthreadpool_atomic_size_t; 474*b095b053SXin Li typedef void *volatile pthreadpool_atomic_void_p; 475*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)476*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 477*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 478*b095b053SXin Li { 479*b095b053SXin Li return *address; 480*b095b053SXin Li } 481*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)482*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 483*b095b053SXin Li pthreadpool_atomic_size_t* address) 484*b095b053SXin Li { 485*b095b053SXin Li return *address; 486*b095b053SXin Li } 487*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)488*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 489*b095b053SXin Li pthreadpool_atomic_void_p* address) 490*b095b053SXin Li { 491*b095b053SXin Li return *address; 492*b095b053SXin Li } 493*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)494*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 495*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 496*b095b053SXin Li { 497*b095b053SXin Li /* x86 loads always have acquire semantics; use only a compiler barrier */ 498*b095b053SXin Li const uint32_t value = *address; 499*b095b053SXin Li _ReadBarrier(); 500*b095b053SXin Li return value; 501*b095b053SXin Li } 502*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)503*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 504*b095b053SXin Li pthreadpool_atomic_size_t* address) 505*b095b053SXin Li { 506*b095b053SXin Li /* x86 loads always have acquire semantics; use only a compiler barrier */ 507*b095b053SXin Li const size_t value = *address; 508*b095b053SXin Li _ReadBarrier(); 509*b095b053SXin Li return value; 510*b095b053SXin Li } 511*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)512*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 513*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 514*b095b053SXin Li uint32_t value) 515*b095b053SXin Li { 516*b095b053SXin Li *address = value; 517*b095b053SXin Li } 518*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)519*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 520*b095b053SXin Li pthreadpool_atomic_size_t* address, 521*b095b053SXin Li size_t value) 522*b095b053SXin Li { 523*b095b053SXin Li *address = value; 524*b095b053SXin Li } 525*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)526*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 527*b095b053SXin Li pthreadpool_atomic_void_p* address, 528*b095b053SXin Li void* value) 529*b095b053SXin Li { 530*b095b053SXin Li *address = value; 531*b095b053SXin Li } 532*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)533*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 534*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 535*b095b053SXin Li uint32_t value) 536*b095b053SXin Li { 537*b095b053SXin Li /* x86 stores always have release semantics; use only a compiler barrier */ 538*b095b053SXin Li _WriteBarrier(); 539*b095b053SXin Li *address = value; 540*b095b053SXin Li } 541*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)542*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 543*b095b053SXin Li pthreadpool_atomic_size_t* address, 544*b095b053SXin Li size_t value) 545*b095b053SXin Li { 546*b095b053SXin Li /* x86 stores always have release semantics; use only a compiler barrier */ 547*b095b053SXin Li _WriteBarrier(); 548*b095b053SXin Li *address = value; 549*b095b053SXin Li } 550*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)551*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 552*b095b053SXin Li pthreadpool_atomic_size_t* address) 553*b095b053SXin Li { 554*b095b053SXin Li return (size_t) _InterlockedDecrement((volatile long*) address); 555*b095b053SXin Li } 556*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)557*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 558*b095b053SXin Li pthreadpool_atomic_size_t* address) 559*b095b053SXin Li { 560*b095b053SXin Li return (size_t) _InterlockedDecrement((volatile long*) address); 561*b095b053SXin Li } 562*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)563*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 564*b095b053SXin Li pthreadpool_atomic_size_t* value) 565*b095b053SXin Li { 566*b095b053SXin Li size_t actual_value = *value; 567*b095b053SXin Li while (actual_value != 0) { 568*b095b053SXin Li const size_t new_value = actual_value - 1; 569*b095b053SXin Li const size_t expected_value = actual_value; 570*b095b053SXin Li actual_value = _InterlockedCompareExchange( 571*b095b053SXin Li (volatile long*) value, (long) new_value, (long) expected_value); 572*b095b053SXin Li if (actual_value == expected_value) { 573*b095b053SXin Li return true; 574*b095b053SXin Li } 575*b095b053SXin Li } 576*b095b053SXin Li return false; 577*b095b053SXin Li } 578*b095b053SXin Li pthreadpool_fence_acquire()579*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 580*b095b053SXin Li _mm_lfence(); 581*b095b053SXin Li } 582*b095b053SXin Li pthreadpool_fence_release()583*b095b053SXin Li static inline void pthreadpool_fence_release() { 584*b095b053SXin Li _mm_sfence(); 585*b095b053SXin Li } 586*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_ARM64) 587*b095b053SXin Li typedef volatile uint32_t pthreadpool_atomic_uint32_t; 588*b095b053SXin Li typedef volatile size_t pthreadpool_atomic_size_t; 589*b095b053SXin Li typedef void *volatile pthreadpool_atomic_void_p; 590*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)591*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 592*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 593*b095b053SXin Li { 594*b095b053SXin Li return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 595*b095b053SXin Li } 596*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)597*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 598*b095b053SXin Li pthreadpool_atomic_size_t* address) 599*b095b053SXin Li { 600*b095b053SXin Li return (size_t) __iso_volatile_load64((const volatile __int64*) address); 601*b095b053SXin Li } 602*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)603*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 604*b095b053SXin Li pthreadpool_atomic_void_p* address) 605*b095b053SXin Li { 606*b095b053SXin Li return (void*) __iso_volatile_load64((const volatile __int64*) address); 607*b095b053SXin Li } 608*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)609*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 610*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 611*b095b053SXin Li { 612*b095b053SXin Li return (uint32_t) __ldar32((volatile unsigned __int32*) address); 613*b095b053SXin Li } 614*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)615*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 616*b095b053SXin Li pthreadpool_atomic_size_t* address) 617*b095b053SXin Li { 618*b095b053SXin Li return (size_t) __ldar64((volatile unsigned __int64*) address); 619*b095b053SXin Li } 620*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)621*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 622*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 623*b095b053SXin Li uint32_t value) 624*b095b053SXin Li { 625*b095b053SXin Li __iso_volatile_store32((volatile __int32*) address, (__int32) value); 626*b095b053SXin Li } 627*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)628*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 629*b095b053SXin Li pthreadpool_atomic_size_t* address, 630*b095b053SXin Li size_t value) 631*b095b053SXin Li { 632*b095b053SXin Li __iso_volatile_store64((volatile __int64*) address, (__int64) value); 633*b095b053SXin Li } 634*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)635*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 636*b095b053SXin Li pthreadpool_atomic_void_p* address, 637*b095b053SXin Li void* value) 638*b095b053SXin Li { 639*b095b053SXin Li __iso_volatile_store64((volatile __int64*) address, (__int64) value); 640*b095b053SXin Li } 641*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)642*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 643*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 644*b095b053SXin Li uint32_t value) 645*b095b053SXin Li { 646*b095b053SXin Li _WriteBarrier(); 647*b095b053SXin Li __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value); 648*b095b053SXin Li } 649*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)650*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 651*b095b053SXin Li pthreadpool_atomic_size_t* address, 652*b095b053SXin Li size_t value) 653*b095b053SXin Li { 654*b095b053SXin Li _WriteBarrier(); 655*b095b053SXin Li __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value); 656*b095b053SXin Li } 657*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)658*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 659*b095b053SXin Li pthreadpool_atomic_size_t* address) 660*b095b053SXin Li { 661*b095b053SXin Li return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address); 662*b095b053SXin Li } 663*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)664*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 665*b095b053SXin Li pthreadpool_atomic_size_t* address) 666*b095b053SXin Li { 667*b095b053SXin Li return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address); 668*b095b053SXin Li } 669*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)670*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 671*b095b053SXin Li pthreadpool_atomic_size_t* value) 672*b095b053SXin Li { 673*b095b053SXin Li size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value); 674*b095b053SXin Li while (actual_value != 0) { 675*b095b053SXin Li const size_t new_value = actual_value - 1; 676*b095b053SXin Li const size_t expected_value = actual_value; 677*b095b053SXin Li actual_value = _InterlockedCompareExchange64_nf( 678*b095b053SXin Li (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); 679*b095b053SXin Li if (actual_value == expected_value) { 680*b095b053SXin Li return true; 681*b095b053SXin Li } 682*b095b053SXin Li } 683*b095b053SXin Li return false; 684*b095b053SXin Li } 685*b095b053SXin Li pthreadpool_fence_acquire()686*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 687*b095b053SXin Li __dmb(_ARM64_BARRIER_ISHLD); 688*b095b053SXin Li _ReadBarrier(); 689*b095b053SXin Li } 690*b095b053SXin Li pthreadpool_fence_release()691*b095b053SXin Li static inline void pthreadpool_fence_release() { 692*b095b053SXin Li _WriteBarrier(); 693*b095b053SXin Li __dmb(_ARM64_BARRIER_ISH); 694*b095b053SXin Li } 695*b095b053SXin Li #elif defined(_MSC_VER) && defined(_M_ARM) 696*b095b053SXin Li typedef volatile uint32_t pthreadpool_atomic_uint32_t; 697*b095b053SXin Li typedef volatile size_t pthreadpool_atomic_size_t; 698*b095b053SXin Li typedef void *volatile pthreadpool_atomic_void_p; 699*b095b053SXin Li pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)700*b095b053SXin Li static inline uint32_t pthreadpool_load_relaxed_uint32_t( 701*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 702*b095b053SXin Li { 703*b095b053SXin Li return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 704*b095b053SXin Li } 705*b095b053SXin Li pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)706*b095b053SXin Li static inline size_t pthreadpool_load_relaxed_size_t( 707*b095b053SXin Li pthreadpool_atomic_size_t* address) 708*b095b053SXin Li { 709*b095b053SXin Li return (size_t) __iso_volatile_load32((const volatile __int32*) address); 710*b095b053SXin Li } 711*b095b053SXin Li pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)712*b095b053SXin Li static inline void* pthreadpool_load_relaxed_void_p( 713*b095b053SXin Li pthreadpool_atomic_void_p* address) 714*b095b053SXin Li { 715*b095b053SXin Li return (void*) __iso_volatile_load32((const volatile __int32*) address); 716*b095b053SXin Li } 717*b095b053SXin Li pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)718*b095b053SXin Li static inline uint32_t pthreadpool_load_acquire_uint32_t( 719*b095b053SXin Li pthreadpool_atomic_uint32_t* address) 720*b095b053SXin Li { 721*b095b053SXin Li const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 722*b095b053SXin Li __dmb(_ARM_BARRIER_ISH); 723*b095b053SXin Li _ReadBarrier(); 724*b095b053SXin Li return value; 725*b095b053SXin Li } 726*b095b053SXin Li pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)727*b095b053SXin Li static inline size_t pthreadpool_load_acquire_size_t( 728*b095b053SXin Li pthreadpool_atomic_size_t* address) 729*b095b053SXin Li { 730*b095b053SXin Li const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address); 731*b095b053SXin Li __dmb(_ARM_BARRIER_ISH); 732*b095b053SXin Li _ReadBarrier(); 733*b095b053SXin Li return value; 734*b095b053SXin Li } 735*b095b053SXin Li pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)736*b095b053SXin Li static inline void pthreadpool_store_relaxed_uint32_t( 737*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 738*b095b053SXin Li uint32_t value) 739*b095b053SXin Li { 740*b095b053SXin Li __iso_volatile_store32((volatile __int32*) address, (__int32) value); 741*b095b053SXin Li } 742*b095b053SXin Li pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)743*b095b053SXin Li static inline void pthreadpool_store_relaxed_size_t( 744*b095b053SXin Li pthreadpool_atomic_size_t* address, 745*b095b053SXin Li size_t value) 746*b095b053SXin Li { 747*b095b053SXin Li __iso_volatile_store32((volatile __int32*) address, (__int32) value); 748*b095b053SXin Li } 749*b095b053SXin Li pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)750*b095b053SXin Li static inline void pthreadpool_store_relaxed_void_p( 751*b095b053SXin Li pthreadpool_atomic_void_p* address, 752*b095b053SXin Li void* value) 753*b095b053SXin Li { 754*b095b053SXin Li __iso_volatile_store32((volatile __int32*) address, (__int32) value); 755*b095b053SXin Li } 756*b095b053SXin Li pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)757*b095b053SXin Li static inline void pthreadpool_store_release_uint32_t( 758*b095b053SXin Li pthreadpool_atomic_uint32_t* address, 759*b095b053SXin Li uint32_t value) 760*b095b053SXin Li { 761*b095b053SXin Li _WriteBarrier(); 762*b095b053SXin Li __dmb(_ARM_BARRIER_ISH); 763*b095b053SXin Li __iso_volatile_store32((volatile __int32*) address, (__int32) value); 764*b095b053SXin Li } 765*b095b053SXin Li pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)766*b095b053SXin Li static inline void pthreadpool_store_release_size_t( 767*b095b053SXin Li pthreadpool_atomic_size_t* address, 768*b095b053SXin Li size_t value) 769*b095b053SXin Li { 770*b095b053SXin Li _WriteBarrier(); 771*b095b053SXin Li __dmb(_ARM_BARRIER_ISH); 772*b095b053SXin Li __iso_volatile_store32((volatile __int32*) address, (__int32) value); 773*b095b053SXin Li } 774*b095b053SXin Li pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)775*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 776*b095b053SXin Li pthreadpool_atomic_size_t* address) 777*b095b053SXin Li { 778*b095b053SXin Li return (size_t) _InterlockedDecrement_nf((volatile long*) address); 779*b095b053SXin Li } 780*b095b053SXin Li pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)781*b095b053SXin Li static inline size_t pthreadpool_decrement_fetch_release_size_t( 782*b095b053SXin Li pthreadpool_atomic_size_t* address) 783*b095b053SXin Li { 784*b095b053SXin Li return (size_t) _InterlockedDecrement_rel((volatile long*) address); 785*b095b053SXin Li } 786*b095b053SXin Li pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)787*b095b053SXin Li static inline bool pthreadpool_try_decrement_relaxed_size_t( 788*b095b053SXin Li pthreadpool_atomic_size_t* value) 789*b095b053SXin Li { 790*b095b053SXin Li size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value); 791*b095b053SXin Li while (actual_value != 0) { 792*b095b053SXin Li const size_t new_value = actual_value - 1; 793*b095b053SXin Li const size_t expected_value = actual_value; 794*b095b053SXin Li actual_value = _InterlockedCompareExchange_nf( 795*b095b053SXin Li (volatile long*) value, (long) new_value, (long) expected_value); 796*b095b053SXin Li if (actual_value == expected_value) { 797*b095b053SXin Li return true; 798*b095b053SXin Li } 799*b095b053SXin Li } 800*b095b053SXin Li return false; 801*b095b053SXin Li } 802*b095b053SXin Li pthreadpool_fence_acquire()803*b095b053SXin Li static inline void pthreadpool_fence_acquire() { 804*b095b053SXin Li __dmb(_ARM_BARRIER_ISH); 805*b095b053SXin Li _ReadBarrier(); 806*b095b053SXin Li } 807*b095b053SXin Li pthreadpool_fence_release()808*b095b053SXin Li static inline void pthreadpool_fence_release() { 809*b095b053SXin Li _WriteBarrier(); 810*b095b053SXin Li __dmb(_ARM_BARRIER_ISH); 811*b095b053SXin Li } 812*b095b053SXin Li #else 813*b095b053SXin Li #error "Platform-specific implementation of threadpool-atomics.h required" 814*b095b053SXin Li #endif 815*b095b053SXin Li 816*b095b053SXin Li #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) pthreadpool_yield()817*b095b053SXin Li static inline void pthreadpool_yield() { 818*b095b053SXin Li _mm_pause(); 819*b095b053SXin Li } 820*b095b053SXin Li #elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) pthreadpool_yield()821*b095b053SXin Li static inline void pthreadpool_yield() { 822*b095b053SXin Li __yield(); 823*b095b053SXin Li } 824*b095b053SXin Li #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__)) pthreadpool_yield()825*b095b053SXin Li static inline void pthreadpool_yield() { 826*b095b053SXin Li __asm__ __volatile__("yield"); 827*b095b053SXin Li } 828*b095b053SXin Li #else pthreadpool_yield()829*b095b053SXin Li static inline void pthreadpool_yield() { 830*b095b053SXin Li pthreadpool_fence_acquire(); 831*b095b053SXin Li } 832*b095b053SXin Li #endif 833