1*b095b053SXin Li #pragma once 2*b095b053SXin Li 3*b095b053SXin Li /* Standard C headers */ 4*b095b053SXin Li #include <stddef.h> 5*b095b053SXin Li #include <stdint.h> 6*b095b053SXin Li 7*b095b053SXin Li /* Internal headers */ 8*b095b053SXin Li #include "threadpool-common.h" 9*b095b053SXin Li #include "threadpool-atomics.h" 10*b095b053SXin Li 11*b095b053SXin Li /* POSIX headers */ 12*b095b053SXin Li #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX 13*b095b053SXin Li #include <pthread.h> 14*b095b053SXin Li #endif 15*b095b053SXin Li 16*b095b053SXin Li /* Mach headers */ 17*b095b053SXin Li #if PTHREADPOOL_USE_GCD 18*b095b053SXin Li #include <dispatch/dispatch.h> 19*b095b053SXin Li #endif 20*b095b053SXin Li 21*b095b053SXin Li /* Windows headers */ 22*b095b053SXin Li #if PTHREADPOOL_USE_EVENT 23*b095b053SXin Li #include <windows.h> 24*b095b053SXin Li #endif 25*b095b053SXin Li 26*b095b053SXin Li /* Dependencies */ 27*b095b053SXin Li #include <fxdiv.h> 28*b095b053SXin Li 29*b095b053SXin Li /* Library header */ 30*b095b053SXin Li #include <pthreadpool.h> 31*b095b053SXin Li 32*b095b053SXin Li 33*b095b053SXin Li #define THREADPOOL_COMMAND_MASK UINT32_C(0x7FFFFFFF) 34*b095b053SXin Li 35*b095b053SXin Li enum threadpool_command { 36*b095b053SXin Li threadpool_command_init, 37*b095b053SXin Li threadpool_command_parallelize, 38*b095b053SXin Li threadpool_command_shutdown, 39*b095b053SXin Li }; 40*b095b053SXin Li 41*b095b053SXin Li struct PTHREADPOOL_CACHELINE_ALIGNED thread_info { 42*b095b053SXin Li /** 43*b095b053SXin Li * Index of the first element in the work range. 44*b095b053SXin Li * Before processing a new element the owning worker thread increments this value. 45*b095b053SXin Li */ 46*b095b053SXin Li pthreadpool_atomic_size_t range_start; 47*b095b053SXin Li /** 48*b095b053SXin Li * Index of the element after the last element of the work range. 49*b095b053SXin Li * Before processing a new element the stealing worker thread decrements this value. 50*b095b053SXin Li */ 51*b095b053SXin Li pthreadpool_atomic_size_t range_end; 52*b095b053SXin Li /** 53*b095b053SXin Li * The number of elements in the work range. 54*b095b053SXin Li * Due to race conditions range_length <= range_end - range_start. 55*b095b053SXin Li * The owning worker thread must decrement this value before incrementing @a range_start. 56*b095b053SXin Li * The stealing worker thread must decrement this value before decrementing @a range_end. 57*b095b053SXin Li */ 58*b095b053SXin Li pthreadpool_atomic_size_t range_length; 59*b095b053SXin Li /** 60*b095b053SXin Li * Thread number in the 0..threads_count-1 range. 61*b095b053SXin Li */ 62*b095b053SXin Li size_t thread_number; 63*b095b053SXin Li /** 64*b095b053SXin Li * Thread pool which owns the thread. 65*b095b053SXin Li */ 66*b095b053SXin Li struct pthreadpool* threadpool; 67*b095b053SXin Li #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX 68*b095b053SXin Li /** 69*b095b053SXin Li * The pthread object corresponding to the thread. 70*b095b053SXin Li */ 71*b095b053SXin Li pthread_t thread_object; 72*b095b053SXin Li #endif 73*b095b053SXin Li #if PTHREADPOOL_USE_EVENT 74*b095b053SXin Li /** 75*b095b053SXin Li * The Windows thread handle corresponding to the thread. 76*b095b053SXin Li */ 77*b095b053SXin Li HANDLE thread_handle; 78*b095b053SXin Li #endif 79*b095b053SXin Li }; 80*b095b053SXin Li 81*b095b053SXin Li PTHREADPOOL_STATIC_ASSERT(sizeof(struct thread_info) % PTHREADPOOL_CACHELINE_SIZE == 0, 82*b095b053SXin Li "thread_info structure must occupy an integer number of cache lines (64 bytes)"); 83*b095b053SXin Li 84*b095b053SXin Li struct pthreadpool_1d_with_uarch_params { 85*b095b053SXin Li /** 86*b095b053SXin Li * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function. 87*b095b053SXin Li */ 88*b095b053SXin Li uint32_t default_uarch_index; 89*b095b053SXin Li /** 90*b095b053SXin Li * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function. 91*b095b053SXin Li */ 92*b095b053SXin Li uint32_t max_uarch_index; 93*b095b053SXin Li }; 94*b095b053SXin Li 95*b095b053SXin Li struct pthreadpool_1d_tile_1d_params { 96*b095b053SXin Li /** 97*b095b053SXin Li * Copy of the range argument passed to the pthreadpool_parallelize_1d_tile_1d function. 98*b095b053SXin Li */ 99*b095b053SXin Li size_t range; 100*b095b053SXin Li /** 101*b095b053SXin Li * Copy of the tile argument passed to the pthreadpool_parallelize_1d_tile_1d function. 102*b095b053SXin Li */ 103*b095b053SXin Li size_t tile; 104*b095b053SXin Li }; 105*b095b053SXin Li 106*b095b053SXin Li struct pthreadpool_2d_params { 107*b095b053SXin Li /** 108*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_2d function. 109*b095b053SXin Li */ 110*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 111*b095b053SXin Li }; 112*b095b053SXin Li 113*b095b053SXin Li struct pthreadpool_2d_tile_1d_params { 114*b095b053SXin Li /** 115*b095b053SXin Li * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_1d function. 116*b095b053SXin Li */ 117*b095b053SXin Li size_t range_j; 118*b095b053SXin Li /** 119*b095b053SXin Li * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_1d function. 120*b095b053SXin Li */ 121*b095b053SXin Li size_t tile_j; 122*b095b053SXin Li /** 123*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 124*b095b053SXin Li */ 125*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_j; 126*b095b053SXin Li }; 127*b095b053SXin Li 128*b095b053SXin Li struct pthreadpool_2d_tile_2d_params { 129*b095b053SXin Li /** 130*b095b053SXin Li * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d function. 131*b095b053SXin Li */ 132*b095b053SXin Li size_t range_i; 133*b095b053SXin Li /** 134*b095b053SXin Li * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d function. 135*b095b053SXin Li */ 136*b095b053SXin Li size_t tile_i; 137*b095b053SXin Li /** 138*b095b053SXin Li * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d function. 139*b095b053SXin Li */ 140*b095b053SXin Li size_t range_j; 141*b095b053SXin Li /** 142*b095b053SXin Li * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d function. 143*b095b053SXin Li */ 144*b095b053SXin Li size_t tile_j; 145*b095b053SXin Li /** 146*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 147*b095b053SXin Li */ 148*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_j; 149*b095b053SXin Li }; 150*b095b053SXin Li 151*b095b053SXin Li struct pthreadpool_2d_tile_2d_with_uarch_params { 152*b095b053SXin Li /** 153*b095b053SXin Li * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 154*b095b053SXin Li */ 155*b095b053SXin Li uint32_t default_uarch_index; 156*b095b053SXin Li /** 157*b095b053SXin Li * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 158*b095b053SXin Li */ 159*b095b053SXin Li uint32_t max_uarch_index; 160*b095b053SXin Li /** 161*b095b053SXin Li * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 162*b095b053SXin Li */ 163*b095b053SXin Li size_t range_i; 164*b095b053SXin Li /** 165*b095b053SXin Li * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 166*b095b053SXin Li */ 167*b095b053SXin Li size_t tile_i; 168*b095b053SXin Li /** 169*b095b053SXin Li * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 170*b095b053SXin Li */ 171*b095b053SXin Li size_t range_j; 172*b095b053SXin Li /** 173*b095b053SXin Li * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. 174*b095b053SXin Li */ 175*b095b053SXin Li size_t tile_j; 176*b095b053SXin Li /** 177*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 178*b095b053SXin Li */ 179*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_j; 180*b095b053SXin Li }; 181*b095b053SXin Li 182*b095b053SXin Li struct pthreadpool_3d_params { 183*b095b053SXin Li /** 184*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d function. 185*b095b053SXin Li */ 186*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 187*b095b053SXin Li /** 188*b095b053SXin Li * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_3d function. 189*b095b053SXin Li */ 190*b095b053SXin Li struct fxdiv_divisor_size_t range_k; 191*b095b053SXin Li }; 192*b095b053SXin Li 193*b095b053SXin Li struct pthreadpool_3d_tile_1d_params { 194*b095b053SXin Li /** 195*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_1d function. 196*b095b053SXin Li */ 197*b095b053SXin Li size_t range_k; 198*b095b053SXin Li /** 199*b095b053SXin Li * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_1d function. 200*b095b053SXin Li */ 201*b095b053SXin Li size_t tile_k; 202*b095b053SXin Li /** 203*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d_tile_1d function. 204*b095b053SXin Li */ 205*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 206*b095b053SXin Li /** 207*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_k, tile_k) value. 208*b095b053SXin Li */ 209*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_k; 210*b095b053SXin Li }; 211*b095b053SXin Li 212*b095b053SXin Li struct pthreadpool_3d_tile_2d_params { 213*b095b053SXin Li /** 214*b095b053SXin Li * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d function. 215*b095b053SXin Li */ 216*b095b053SXin Li size_t range_j; 217*b095b053SXin Li /** 218*b095b053SXin Li * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d function. 219*b095b053SXin Li */ 220*b095b053SXin Li size_t tile_j; 221*b095b053SXin Li /** 222*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d function. 223*b095b053SXin Li */ 224*b095b053SXin Li size_t range_k; 225*b095b053SXin Li /** 226*b095b053SXin Li * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d function. 227*b095b053SXin Li */ 228*b095b053SXin Li size_t tile_k; 229*b095b053SXin Li /** 230*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 231*b095b053SXin Li */ 232*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_j; 233*b095b053SXin Li /** 234*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_k, tile_k) value. 235*b095b053SXin Li */ 236*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_k; 237*b095b053SXin Li }; 238*b095b053SXin Li 239*b095b053SXin Li struct pthreadpool_3d_tile_2d_with_uarch_params { 240*b095b053SXin Li /** 241*b095b053SXin Li * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 242*b095b053SXin Li */ 243*b095b053SXin Li uint32_t default_uarch_index; 244*b095b053SXin Li /** 245*b095b053SXin Li * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 246*b095b053SXin Li */ 247*b095b053SXin Li uint32_t max_uarch_index; 248*b095b053SXin Li /** 249*b095b053SXin Li * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 250*b095b053SXin Li */ 251*b095b053SXin Li size_t range_j; 252*b095b053SXin Li /** 253*b095b053SXin Li * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 254*b095b053SXin Li */ 255*b095b053SXin Li size_t tile_j; 256*b095b053SXin Li /** 257*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 258*b095b053SXin Li */ 259*b095b053SXin Li size_t range_k; 260*b095b053SXin Li /** 261*b095b053SXin Li * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. 262*b095b053SXin Li */ 263*b095b053SXin Li size_t tile_k; 264*b095b053SXin Li /** 265*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_j, tile_j) value. 266*b095b053SXin Li */ 267*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_j; 268*b095b053SXin Li /** 269*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_k, tile_k) value. 270*b095b053SXin Li */ 271*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_k; 272*b095b053SXin Li }; 273*b095b053SXin Li 274*b095b053SXin Li struct pthreadpool_4d_params { 275*b095b053SXin Li /** 276*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_4d function. 277*b095b053SXin Li */ 278*b095b053SXin Li size_t range_k; 279*b095b053SXin Li /** 280*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d function. 281*b095b053SXin Li */ 282*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 283*b095b053SXin Li /** 284*b095b053SXin Li * FXdiv divisor for the range_k * range_l value. 285*b095b053SXin Li */ 286*b095b053SXin Li struct fxdiv_divisor_size_t range_kl; 287*b095b053SXin Li /** 288*b095b053SXin Li * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_4d function. 289*b095b053SXin Li */ 290*b095b053SXin Li struct fxdiv_divisor_size_t range_l; 291*b095b053SXin Li }; 292*b095b053SXin Li 293*b095b053SXin Li struct pthreadpool_4d_tile_1d_params { 294*b095b053SXin Li /** 295*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_1d function. 296*b095b053SXin Li */ 297*b095b053SXin Li size_t range_k; 298*b095b053SXin Li /** 299*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_1d function. 300*b095b053SXin Li */ 301*b095b053SXin Li size_t range_l; 302*b095b053SXin Li /** 303*b095b053SXin Li * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_1d function. 304*b095b053SXin Li */ 305*b095b053SXin Li size_t tile_l; 306*b095b053SXin Li /** 307*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_1d function. 308*b095b053SXin Li */ 309*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 310*b095b053SXin Li /** 311*b095b053SXin Li * FXdiv divisor for the range_k * divide_round_up(range_l, tile_l) value. 312*b095b053SXin Li */ 313*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_kl; 314*b095b053SXin Li /** 315*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_l, tile_l) value. 316*b095b053SXin Li */ 317*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_l; 318*b095b053SXin Li }; 319*b095b053SXin Li 320*b095b053SXin Li struct pthreadpool_4d_tile_2d_params { 321*b095b053SXin Li /** 322*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d function. 323*b095b053SXin Li */ 324*b095b053SXin Li size_t range_k; 325*b095b053SXin Li /** 326*b095b053SXin Li * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d function. 327*b095b053SXin Li */ 328*b095b053SXin Li size_t tile_k; 329*b095b053SXin Li /** 330*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d function. 331*b095b053SXin Li */ 332*b095b053SXin Li size_t range_l; 333*b095b053SXin Li /** 334*b095b053SXin Li * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d function. 335*b095b053SXin Li */ 336*b095b053SXin Li size_t tile_l; 337*b095b053SXin Li /** 338*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d function. 339*b095b053SXin Li */ 340*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 341*b095b053SXin Li /** 342*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value. 343*b095b053SXin Li */ 344*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_kl; 345*b095b053SXin Li /** 346*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_l, tile_l) value. 347*b095b053SXin Li */ 348*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_l; 349*b095b053SXin Li }; 350*b095b053SXin Li 351*b095b053SXin Li struct pthreadpool_4d_tile_2d_with_uarch_params { 352*b095b053SXin Li /** 353*b095b053SXin Li * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 354*b095b053SXin Li */ 355*b095b053SXin Li uint32_t default_uarch_index; 356*b095b053SXin Li /** 357*b095b053SXin Li * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 358*b095b053SXin Li */ 359*b095b053SXin Li uint32_t max_uarch_index; 360*b095b053SXin Li /** 361*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 362*b095b053SXin Li */ 363*b095b053SXin Li size_t range_k; 364*b095b053SXin Li /** 365*b095b053SXin Li * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 366*b095b053SXin Li */ 367*b095b053SXin Li size_t tile_k; 368*b095b053SXin Li /** 369*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 370*b095b053SXin Li */ 371*b095b053SXin Li size_t range_l; 372*b095b053SXin Li /** 373*b095b053SXin Li * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 374*b095b053SXin Li */ 375*b095b053SXin Li size_t tile_l; 376*b095b053SXin Li /** 377*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. 378*b095b053SXin Li */ 379*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 380*b095b053SXin Li /** 381*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value. 382*b095b053SXin Li */ 383*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_kl; 384*b095b053SXin Li /** 385*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_l, tile_l) value. 386*b095b053SXin Li */ 387*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_l; 388*b095b053SXin Li }; 389*b095b053SXin Li 390*b095b053SXin Li struct pthreadpool_5d_params { 391*b095b053SXin Li /** 392*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_5d function. 393*b095b053SXin Li */ 394*b095b053SXin Li size_t range_l; 395*b095b053SXin Li /** 396*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d function. 397*b095b053SXin Li */ 398*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 399*b095b053SXin Li /** 400*b095b053SXin Li * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d function. 401*b095b053SXin Li */ 402*b095b053SXin Li struct fxdiv_divisor_size_t range_k; 403*b095b053SXin Li /** 404*b095b053SXin Li * FXdiv divisor for the range_l * range_m value. 405*b095b053SXin Li */ 406*b095b053SXin Li struct fxdiv_divisor_size_t range_lm; 407*b095b053SXin Li /** 408*b095b053SXin Li * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_5d function. 409*b095b053SXin Li */ 410*b095b053SXin Li struct fxdiv_divisor_size_t range_m; 411*b095b053SXin Li }; 412*b095b053SXin Li 413*b095b053SXin Li struct pthreadpool_5d_tile_1d_params { 414*b095b053SXin Li /** 415*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_5d_tile_1d function. 416*b095b053SXin Li */ 417*b095b053SXin Li size_t range_k; 418*b095b053SXin Li /** 419*b095b053SXin Li * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_1d function. 420*b095b053SXin Li */ 421*b095b053SXin Li size_t range_m; 422*b095b053SXin Li /** 423*b095b053SXin Li * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_1d function. 424*b095b053SXin Li */ 425*b095b053SXin Li size_t tile_m; 426*b095b053SXin Li /** 427*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_1d function. 428*b095b053SXin Li */ 429*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 430*b095b053SXin Li /** 431*b095b053SXin Li * FXdiv divisor for the range_k * range_l value. 432*b095b053SXin Li */ 433*b095b053SXin Li struct fxdiv_divisor_size_t range_kl; 434*b095b053SXin Li /** 435*b095b053SXin Li * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_5d_tile_1d function. 436*b095b053SXin Li */ 437*b095b053SXin Li struct fxdiv_divisor_size_t range_l; 438*b095b053SXin Li /** 439*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_m, tile_m) value. 440*b095b053SXin Li */ 441*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_m; 442*b095b053SXin Li }; 443*b095b053SXin Li 444*b095b053SXin Li struct pthreadpool_5d_tile_2d_params { 445*b095b053SXin Li /** 446*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_5d_tile_2d function. 447*b095b053SXin Li */ 448*b095b053SXin Li size_t range_l; 449*b095b053SXin Li /** 450*b095b053SXin Li * Copy of the tile_l argument passed to the pthreadpool_parallelize_5d_tile_2d function. 451*b095b053SXin Li */ 452*b095b053SXin Li size_t tile_l; 453*b095b053SXin Li /** 454*b095b053SXin Li * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_2d function. 455*b095b053SXin Li */ 456*b095b053SXin Li size_t range_m; 457*b095b053SXin Li /** 458*b095b053SXin Li * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_2d function. 459*b095b053SXin Li */ 460*b095b053SXin Li size_t tile_m; 461*b095b053SXin Li /** 462*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_2d function. 463*b095b053SXin Li */ 464*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 465*b095b053SXin Li /** 466*b095b053SXin Li * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d_tile_2d function. 467*b095b053SXin Li */ 468*b095b053SXin Li struct fxdiv_divisor_size_t range_k; 469*b095b053SXin Li /** 470*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_l, tile_l) * divide_round_up(range_m, tile_m) value. 471*b095b053SXin Li */ 472*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_lm; 473*b095b053SXin Li /** 474*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_m, tile_m) value. 475*b095b053SXin Li */ 476*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_m; 477*b095b053SXin Li }; 478*b095b053SXin Li 479*b095b053SXin Li struct pthreadpool_6d_params { 480*b095b053SXin Li /** 481*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_6d function. 482*b095b053SXin Li */ 483*b095b053SXin Li size_t range_l; 484*b095b053SXin Li /** 485*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d function. 486*b095b053SXin Li */ 487*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 488*b095b053SXin Li /** 489*b095b053SXin Li * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d function. 490*b095b053SXin Li */ 491*b095b053SXin Li struct fxdiv_divisor_size_t range_k; 492*b095b053SXin Li /** 493*b095b053SXin Li * FXdiv divisor for the range_l * range_m * range_n value. 494*b095b053SXin Li */ 495*b095b053SXin Li struct fxdiv_divisor_size_t range_lmn; 496*b095b053SXin Li /** 497*b095b053SXin Li * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d function. 498*b095b053SXin Li */ 499*b095b053SXin Li struct fxdiv_divisor_size_t range_m; 500*b095b053SXin Li /** 501*b095b053SXin Li * FXdiv divisor for the range_n argument passed to the pthreadpool_parallelize_6d function. 502*b095b053SXin Li */ 503*b095b053SXin Li struct fxdiv_divisor_size_t range_n; 504*b095b053SXin Li }; 505*b095b053SXin Li 506*b095b053SXin Li struct pthreadpool_6d_tile_1d_params { 507*b095b053SXin Li /** 508*b095b053SXin Li * Copy of the range_l argument passed to the pthreadpool_parallelize_6d_tile_1d function. 509*b095b053SXin Li */ 510*b095b053SXin Li size_t range_l; 511*b095b053SXin Li /** 512*b095b053SXin Li * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_1d function. 513*b095b053SXin Li */ 514*b095b053SXin Li size_t range_n; 515*b095b053SXin Li /** 516*b095b053SXin Li * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_1d function. 517*b095b053SXin Li */ 518*b095b053SXin Li size_t tile_n; 519*b095b053SXin Li /** 520*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_1d function. 521*b095b053SXin Li */ 522*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 523*b095b053SXin Li /** 524*b095b053SXin Li * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d_tile_1d function. 525*b095b053SXin Li */ 526*b095b053SXin Li struct fxdiv_divisor_size_t range_k; 527*b095b053SXin Li /** 528*b095b053SXin Li * FXdiv divisor for the range_l * range_m * divide_round_up(range_n, tile_n) value. 529*b095b053SXin Li */ 530*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_lmn; 531*b095b053SXin Li /** 532*b095b053SXin Li * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d_tile_1d function. 533*b095b053SXin Li */ 534*b095b053SXin Li struct fxdiv_divisor_size_t range_m; 535*b095b053SXin Li /** 536*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_n, tile_n) value. 537*b095b053SXin Li */ 538*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_n; 539*b095b053SXin Li }; 540*b095b053SXin Li 541*b095b053SXin Li struct pthreadpool_6d_tile_2d_params { 542*b095b053SXin Li /** 543*b095b053SXin Li * Copy of the range_k argument passed to the pthreadpool_parallelize_6d_tile_2d function. 544*b095b053SXin Li */ 545*b095b053SXin Li size_t range_k; 546*b095b053SXin Li /** 547*b095b053SXin Li * Copy of the range_m argument passed to the pthreadpool_parallelize_6d_tile_2d function. 548*b095b053SXin Li */ 549*b095b053SXin Li size_t range_m; 550*b095b053SXin Li /** 551*b095b053SXin Li * Copy of the tile_m argument passed to the pthreadpool_parallelize_6d_tile_2d function. 552*b095b053SXin Li */ 553*b095b053SXin Li size_t tile_m; 554*b095b053SXin Li /** 555*b095b053SXin Li * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_2d function. 556*b095b053SXin Li */ 557*b095b053SXin Li size_t range_n; 558*b095b053SXin Li /** 559*b095b053SXin Li * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_2d function. 560*b095b053SXin Li */ 561*b095b053SXin Li size_t tile_n; 562*b095b053SXin Li /** 563*b095b053SXin Li * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_2d function. 564*b095b053SXin Li */ 565*b095b053SXin Li struct fxdiv_divisor_size_t range_j; 566*b095b053SXin Li /** 567*b095b053SXin Li * FXdiv divisor for the range_k * range_l value. 568*b095b053SXin Li */ 569*b095b053SXin Li struct fxdiv_divisor_size_t range_kl; 570*b095b053SXin Li /** 571*b095b053SXin Li * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_6d_tile_2d function. 572*b095b053SXin Li */ 573*b095b053SXin Li struct fxdiv_divisor_size_t range_l; 574*b095b053SXin Li /** 575*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_m, tile_m) * divide_round_up(range_n, tile_n) value. 576*b095b053SXin Li */ 577*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_mn; 578*b095b053SXin Li /** 579*b095b053SXin Li * FXdiv divisor for the divide_round_up(range_n, tile_n) value. 580*b095b053SXin Li */ 581*b095b053SXin Li struct fxdiv_divisor_size_t tile_range_n; 582*b095b053SXin Li }; 583*b095b053SXin Li 584*b095b053SXin Li struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool { 585*b095b053SXin Li #if !PTHREADPOOL_USE_GCD 586*b095b053SXin Li /** 587*b095b053SXin Li * The number of threads that are processing an operation. 588*b095b053SXin Li */ 589*b095b053SXin Li pthreadpool_atomic_size_t active_threads; 590*b095b053SXin Li #endif 591*b095b053SXin Li #if PTHREADPOOL_USE_FUTEX 592*b095b053SXin Li /** 593*b095b053SXin Li * Indicates if there are active threads. 594*b095b053SXin Li * Only two values are possible: 595*b095b053SXin Li * - has_active_threads == 0 if active_threads == 0 596*b095b053SXin Li * - has_active_threads == 1 if active_threads != 0 597*b095b053SXin Li */ 598*b095b053SXin Li pthreadpool_atomic_uint32_t has_active_threads; 599*b095b053SXin Li #endif 600*b095b053SXin Li #if !PTHREADPOOL_USE_GCD 601*b095b053SXin Li /** 602*b095b053SXin Li * The last command submitted to the thread pool. 603*b095b053SXin Li */ 604*b095b053SXin Li pthreadpool_atomic_uint32_t command; 605*b095b053SXin Li #endif 606*b095b053SXin Li /** 607*b095b053SXin Li * The entry point function to call for each thread in the thread pool for parallelization tasks. 608*b095b053SXin Li */ 609*b095b053SXin Li pthreadpool_atomic_void_p thread_function; 610*b095b053SXin Li /** 611*b095b053SXin Li * The function to call for each item. 612*b095b053SXin Li */ 613*b095b053SXin Li pthreadpool_atomic_void_p task; 614*b095b053SXin Li /** 615*b095b053SXin Li * The first argument to the item processing function. 616*b095b053SXin Li */ 617*b095b053SXin Li pthreadpool_atomic_void_p argument; 618*b095b053SXin Li /** 619*b095b053SXin Li * Additional parallelization parameters. 620*b095b053SXin Li * These parameters are specific for each thread_function. 621*b095b053SXin Li */ 622*b095b053SXin Li union { 623*b095b053SXin Li struct pthreadpool_1d_with_uarch_params parallelize_1d_with_uarch; 624*b095b053SXin Li struct pthreadpool_1d_tile_1d_params parallelize_1d_tile_1d; 625*b095b053SXin Li struct pthreadpool_2d_params parallelize_2d; 626*b095b053SXin Li struct pthreadpool_2d_tile_1d_params parallelize_2d_tile_1d; 627*b095b053SXin Li struct pthreadpool_2d_tile_2d_params parallelize_2d_tile_2d; 628*b095b053SXin Li struct pthreadpool_2d_tile_2d_with_uarch_params parallelize_2d_tile_2d_with_uarch; 629*b095b053SXin Li struct pthreadpool_3d_params parallelize_3d; 630*b095b053SXin Li struct pthreadpool_3d_tile_1d_params parallelize_3d_tile_1d; 631*b095b053SXin Li struct pthreadpool_3d_tile_2d_params parallelize_3d_tile_2d; 632*b095b053SXin Li struct pthreadpool_3d_tile_2d_with_uarch_params parallelize_3d_tile_2d_with_uarch; 633*b095b053SXin Li struct pthreadpool_4d_params parallelize_4d; 634*b095b053SXin Li struct pthreadpool_4d_tile_1d_params parallelize_4d_tile_1d; 635*b095b053SXin Li struct pthreadpool_4d_tile_2d_params parallelize_4d_tile_2d; 636*b095b053SXin Li struct pthreadpool_4d_tile_2d_with_uarch_params parallelize_4d_tile_2d_with_uarch; 637*b095b053SXin Li struct pthreadpool_5d_params parallelize_5d; 638*b095b053SXin Li struct pthreadpool_5d_tile_1d_params parallelize_5d_tile_1d; 639*b095b053SXin Li struct pthreadpool_5d_tile_2d_params parallelize_5d_tile_2d; 640*b095b053SXin Li struct pthreadpool_6d_params parallelize_6d; 641*b095b053SXin Li struct pthreadpool_6d_tile_1d_params parallelize_6d_tile_1d; 642*b095b053SXin Li struct pthreadpool_6d_tile_2d_params parallelize_6d_tile_2d; 643*b095b053SXin Li } params; 644*b095b053SXin Li /** 645*b095b053SXin Li * Copy of the flags passed to a parallelization function. 646*b095b053SXin Li */ 647*b095b053SXin Li pthreadpool_atomic_uint32_t flags; 648*b095b053SXin Li #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX 649*b095b053SXin Li /** 650*b095b053SXin Li * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. 651*b095b053SXin Li */ 652*b095b053SXin Li pthread_mutex_t execution_mutex; 653*b095b053SXin Li #endif 654*b095b053SXin Li #if PTHREADPOOL_USE_GCD 655*b095b053SXin Li /** 656*b095b053SXin Li * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. 657*b095b053SXin Li */ 658*b095b053SXin Li dispatch_semaphore_t execution_semaphore; 659*b095b053SXin Li #endif 660*b095b053SXin Li #if PTHREADPOOL_USE_EVENT 661*b095b053SXin Li /** 662*b095b053SXin Li * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. 663*b095b053SXin Li */ 664*b095b053SXin Li HANDLE execution_mutex; 665*b095b053SXin Li #endif 666*b095b053SXin Li #if PTHREADPOOL_USE_CONDVAR 667*b095b053SXin Li /** 668*b095b053SXin Li * Guards access to the @a active_threads variable. 669*b095b053SXin Li */ 670*b095b053SXin Li pthread_mutex_t completion_mutex; 671*b095b053SXin Li /** 672*b095b053SXin Li * Condition variable to wait until all threads complete an operation (until @a active_threads is zero). 673*b095b053SXin Li */ 674*b095b053SXin Li pthread_cond_t completion_condvar; 675*b095b053SXin Li /** 676*b095b053SXin Li * Guards access to the @a command variable. 677*b095b053SXin Li */ 678*b095b053SXin Li pthread_mutex_t command_mutex; 679*b095b053SXin Li /** 680*b095b053SXin Li * Condition variable to wait for change of the @a command variable. 681*b095b053SXin Li */ 682*b095b053SXin Li pthread_cond_t command_condvar; 683*b095b053SXin Li #endif 684*b095b053SXin Li #if PTHREADPOOL_USE_EVENT 685*b095b053SXin Li /** 686*b095b053SXin Li * Events to wait on until all threads complete an operation (until @a active_threads is zero). 687*b095b053SXin Li * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every 688*b095b053SXin Li * submitted command according to the high bit of the command word. 689*b095b053SXin Li */ 690*b095b053SXin Li HANDLE completion_event[2]; 691*b095b053SXin Li /** 692*b095b053SXin Li * Events to wait on for change of the @a command variable. 693*b095b053SXin Li * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every 694*b095b053SXin Li * submitted command according to the high bit of the command word. 695*b095b053SXin Li */ 696*b095b053SXin Li HANDLE command_event[2]; 697*b095b053SXin Li #endif 698*b095b053SXin Li /** 699*b095b053SXin Li * FXdiv divisor for the number of threads in the thread pool. 700*b095b053SXin Li * This struct never change after pthreadpool_create. 701*b095b053SXin Li */ 702*b095b053SXin Li struct fxdiv_divisor_size_t threads_count; 703*b095b053SXin Li /** 704*b095b053SXin Li * Thread information structures that immediately follow this structure. 705*b095b053SXin Li */ 706*b095b053SXin Li struct thread_info threads[]; 707*b095b053SXin Li }; 708*b095b053SXin Li 709*b095b053SXin Li PTHREADPOOL_STATIC_ASSERT(sizeof(struct pthreadpool) % PTHREADPOOL_CACHELINE_SIZE == 0, 710*b095b053SXin Li "pthreadpool structure must occupy an integer number of cache lines (64 bytes)"); 711*b095b053SXin Li 712*b095b053SXin Li PTHREADPOOL_INTERNAL struct pthreadpool* pthreadpool_allocate( 713*b095b053SXin Li size_t threads_count); 714*b095b053SXin Li 715*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_deallocate( 716*b095b053SXin Li struct pthreadpool* threadpool); 717*b095b053SXin Li 718*b095b053SXin Li typedef void (*thread_function_t)(struct pthreadpool* threadpool, struct thread_info* thread); 719*b095b053SXin Li 720*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_parallelize( 721*b095b053SXin Li struct pthreadpool* threadpool, 722*b095b053SXin Li thread_function_t thread_function, 723*b095b053SXin Li const void* params, 724*b095b053SXin Li size_t params_size, 725*b095b053SXin Li void* task, 726*b095b053SXin Li void* context, 727*b095b053SXin Li size_t linear_range, 728*b095b053SXin Li uint32_t flags); 729*b095b053SXin Li 730*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath( 731*b095b053SXin Li struct pthreadpool* threadpool, 732*b095b053SXin Li struct thread_info* thread); 733*b095b053SXin Li 734*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath( 735*b095b053SXin Li struct pthreadpool* threadpool, 736*b095b053SXin Li struct thread_info* thread); 737*b095b053SXin Li 738*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath( 739*b095b053SXin Li struct pthreadpool* threadpool, 740*b095b053SXin Li struct thread_info* thread); 741*b095b053SXin Li 742*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath( 743*b095b053SXin Li struct pthreadpool* threadpool, 744*b095b053SXin Li struct thread_info* thread); 745*b095b053SXin Li 746*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath( 747*b095b053SXin Li struct pthreadpool* threadpool, 748*b095b053SXin Li struct thread_info* thread); 749*b095b053SXin Li 750*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath( 751*b095b053SXin Li struct pthreadpool* threadpool, 752*b095b053SXin Li struct thread_info* thread); 753*b095b053SXin Li 754*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath( 755*b095b053SXin Li struct pthreadpool* threadpool, 756*b095b053SXin Li struct thread_info* thread); 757*b095b053SXin Li 758*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath( 759*b095b053SXin Li struct pthreadpool* threadpool, 760*b095b053SXin Li struct thread_info* thread); 761*b095b053SXin Li 762*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath( 763*b095b053SXin Li struct pthreadpool* threadpool, 764*b095b053SXin Li struct thread_info* thread); 765*b095b053SXin Li 766*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath( 767*b095b053SXin Li struct pthreadpool* threadpool, 768*b095b053SXin Li struct thread_info* thread); 769*b095b053SXin Li 770*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath( 771*b095b053SXin Li struct pthreadpool* threadpool, 772*b095b053SXin Li struct thread_info* thread); 773*b095b053SXin Li 774*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath( 775*b095b053SXin Li struct pthreadpool* threadpool, 776*b095b053SXin Li struct thread_info* thread); 777*b095b053SXin Li 778*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath( 779*b095b053SXin Li struct pthreadpool* threadpool, 780*b095b053SXin Li struct thread_info* thread); 781*b095b053SXin Li 782*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath( 783*b095b053SXin Li struct pthreadpool* threadpool, 784*b095b053SXin Li struct thread_info* thread); 785*b095b053SXin Li 786*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath( 787*b095b053SXin Li struct pthreadpool* threadpool, 788*b095b053SXin Li struct thread_info* thread); 789*b095b053SXin Li 790*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath( 791*b095b053SXin Li struct pthreadpool* threadpool, 792*b095b053SXin Li struct thread_info* thread); 793*b095b053SXin Li 794*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath( 795*b095b053SXin Li struct pthreadpool* threadpool, 796*b095b053SXin Li struct thread_info* thread); 797*b095b053SXin Li 798*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath( 799*b095b053SXin Li struct pthreadpool* threadpool, 800*b095b053SXin Li struct thread_info* thread); 801*b095b053SXin Li 802*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath( 803*b095b053SXin Li struct pthreadpool* threadpool, 804*b095b053SXin Li struct thread_info* thread); 805*b095b053SXin Li 806*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath( 807*b095b053SXin Li struct pthreadpool* threadpool, 808*b095b053SXin Li struct thread_info* thread); 809*b095b053SXin Li 810*b095b053SXin Li PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath( 811*b095b053SXin Li struct pthreadpool* threadpool, 812*b095b053SXin Li struct thread_info* thread); 813