1*b095b053SXin Li #ifndef PTHREADPOOL_H_ 2*b095b053SXin Li #define PTHREADPOOL_H_ 3*b095b053SXin Li 4*b095b053SXin Li #include <stddef.h> 5*b095b053SXin Li #include <stdint.h> 6*b095b053SXin Li 7*b095b053SXin Li typedef struct pthreadpool* pthreadpool_t; 8*b095b053SXin Li 9*b095b053SXin Li typedef void (*pthreadpool_task_1d_t)(void*, size_t); 10*b095b053SXin Li typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t); 11*b095b053SXin Li typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t); 12*b095b053SXin Li typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t); 13*b095b053SXin Li typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t); 14*b095b053SXin Li typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t); 15*b095b053SXin Li typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t); 16*b095b053SXin Li typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t); 17*b095b053SXin Li typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t); 18*b095b053SXin Li typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t); 19*b095b053SXin Li typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); 20*b095b053SXin Li typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t); 21*b095b053SXin Li typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); 22*b095b053SXin Li typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); 23*b095b053SXin Li typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); 24*b095b053SXin Li typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); 25*b095b053SXin Li typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t); 26*b095b053SXin Li 27*b095b053SXin Li typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t); 28*b095b053SXin Li typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t); 29*b095b053SXin Li typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t); 30*b095b053SXin Li typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t); 31*b095b053SXin Li 32*b095b053SXin Li 33*b095b053SXin Li /** 34*b095b053SXin Li * Disable support for denormalized numbers to the maximum extent possible for 35*b095b053SXin Li * the duration of the computation. 36*b095b053SXin Li * 37*b095b053SXin Li * Handling denormalized floating-point numbers is often implemented in 38*b095b053SXin Li * microcode, and incurs significant performance degradation. This hint 39*b095b053SXin Li * instructs the thread pool to disable support for denormalized numbers before 40*b095b053SXin Li * running the computation by manipulating architecture-specific control 41*b095b053SXin Li * registers, and restore the initial value of control registers after the 42*b095b053SXin Li * computation is complete. The thread pool temporary disables denormalized 43*b095b053SXin Li * numbers on all threads involved in the computation (i.e. the caller threads, 44*b095b053SXin Li * and potentially worker threads). 45*b095b053SXin Li * 46*b095b053SXin Li * Disabling denormalized numbers may have a small negative effect on results' 47*b095b053SXin Li * accuracy. As various architectures differ in capabilities to control 48*b095b053SXin Li * processing of denormalized numbers, using this flag may also hurt results' 49*b095b053SXin Li * reproducibility across different instruction set architectures. 50*b095b053SXin Li */ 51*b095b053SXin Li #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001 52*b095b053SXin Li 53*b095b053SXin Li /** 54*b095b053SXin Li * Yield worker threads to the system scheduler after the operation is finished. 55*b095b053SXin Li * 56*b095b053SXin Li * Force workers to use kernel wait (instead of active spin-wait by default) for 57*b095b053SXin Li * new commands after this command is processed. This flag affects only the 58*b095b053SXin Li * immediate next operation on this thread pool. To make the thread pool always 59*b095b053SXin Li * use kernel wait, pass this flag to all parallelization functions. 60*b095b053SXin Li */ 61*b095b053SXin Li #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002 62*b095b053SXin Li 63*b095b053SXin Li #ifdef __cplusplus 64*b095b053SXin Li extern "C" { 65*b095b053SXin Li #endif 66*b095b053SXin Li 67*b095b053SXin Li /** 68*b095b053SXin Li * Create a thread pool with the specified number of threads. 69*b095b053SXin Li * 70*b095b053SXin Li * @param threads_count the number of threads in the thread pool. 71*b095b053SXin Li * A value of 0 has special interpretation: it creates a thread pool with as 72*b095b053SXin Li * many threads as there are logical processors in the system. 73*b095b053SXin Li * 74*b095b053SXin Li * @returns A pointer to an opaque thread pool object if the call is 75*b095b053SXin Li * successful, or NULL pointer if the call failed. 76*b095b053SXin Li */ 77*b095b053SXin Li pthreadpool_t pthreadpool_create(size_t threads_count); 78*b095b053SXin Li 79*b095b053SXin Li /** 80*b095b053SXin Li * Query the number of threads in a thread pool. 81*b095b053SXin Li * 82*b095b053SXin Li * @param threadpool the thread pool to query. 83*b095b053SXin Li * 84*b095b053SXin Li * @returns The number of threads in the thread pool. 85*b095b053SXin Li */ 86*b095b053SXin Li size_t pthreadpool_get_threads_count(pthreadpool_t threadpool); 87*b095b053SXin Li 88*b095b053SXin Li /** 89*b095b053SXin Li * Process items on a 1D grid. 90*b095b053SXin Li * 91*b095b053SXin Li * The function implements a parallel version of the following snippet: 92*b095b053SXin Li * 93*b095b053SXin Li * for (size_t i = 0; i < range; i++) 94*b095b053SXin Li * function(context, i); 95*b095b053SXin Li * 96*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 97*b095b053SXin Li * is ready for a new task. 98*b095b053SXin Li * 99*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 100*b095b053SXin Li * calls are serialized. 101*b095b053SXin Li * 102*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 103*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 104*b095b053SXin Li * @param function the function to call for each item. 105*b095b053SXin Li * @param context the first argument passed to the specified function. 106*b095b053SXin Li * @param range the number of items on the 1D grid to process. The 107*b095b053SXin Li * specified function will be called once for each item. 108*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 109*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 110*b095b053SXin Li */ 111*b095b053SXin Li void pthreadpool_parallelize_1d( 112*b095b053SXin Li pthreadpool_t threadpool, 113*b095b053SXin Li pthreadpool_task_1d_t function, 114*b095b053SXin Li void* context, 115*b095b053SXin Li size_t range, 116*b095b053SXin Li uint32_t flags); 117*b095b053SXin Li 118*b095b053SXin Li /** 119*b095b053SXin Li * Process items on a 1D grid using a microarchitecture-aware task function. 120*b095b053SXin Li * 121*b095b053SXin Li * The function implements a parallel version of the following snippet: 122*b095b053SXin Li * 123*b095b053SXin Li * uint32_t uarch_index = cpuinfo_initialize() ? 124*b095b053SXin Li * cpuinfo_get_current_uarch_index() : default_uarch_index; 125*b095b053SXin Li * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; 126*b095b053SXin Li * for (size_t i = 0; i < range; i++) 127*b095b053SXin Li * function(context, uarch_index, i); 128*b095b053SXin Li * 129*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 130*b095b053SXin Li * is ready for a new task. 131*b095b053SXin Li * 132*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 133*b095b053SXin Li * calls are serialized. 134*b095b053SXin Li * 135*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If 136*b095b053SXin Li * threadpool is NULL, all items are processed serially on the calling 137*b095b053SXin Li * thread. 138*b095b053SXin Li * @param function the function to call for each item. 139*b095b053SXin Li * @param context the first argument passed to the specified 140*b095b053SXin Li * function. 141*b095b053SXin Li * @param default_uarch_index the microarchitecture index to use when 142*b095b053SXin Li * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, 143*b095b053SXin Li * or index returned by cpuinfo_get_current_uarch_index() exceeds the 144*b095b053SXin Li * max_uarch_index value. 145*b095b053SXin Li * @param max_uarch_index the maximum microarchitecture index expected by 146*b095b053SXin Li * the specified function. If the index returned by 147*b095b053SXin Li * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index 148*b095b053SXin Li * will be used instead. default_uarch_index can exceed max_uarch_index. 149*b095b053SXin Li * @param range the number of items on the 1D grid to process. 150*b095b053SXin Li * The specified function will be called once for each item. 151*b095b053SXin Li * @param flags a bitwise combination of zero or more optional 152*b095b053SXin Li * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or 153*b095b053SXin Li * PTHREADPOOL_FLAG_YIELD_WORKERS) 154*b095b053SXin Li */ 155*b095b053SXin Li void pthreadpool_parallelize_1d_with_uarch( 156*b095b053SXin Li pthreadpool_t threadpool, 157*b095b053SXin Li pthreadpool_task_1d_with_id_t function, 158*b095b053SXin Li void* context, 159*b095b053SXin Li uint32_t default_uarch_index, 160*b095b053SXin Li uint32_t max_uarch_index, 161*b095b053SXin Li size_t range, 162*b095b053SXin Li uint32_t flags); 163*b095b053SXin Li 164*b095b053SXin Li /** 165*b095b053SXin Li * Process items on a 1D grid with specified maximum tile size. 166*b095b053SXin Li * 167*b095b053SXin Li * The function implements a parallel version of the following snippet: 168*b095b053SXin Li * 169*b095b053SXin Li * for (size_t i = 0; i < range; i += tile) 170*b095b053SXin Li * function(context, i, min(range - i, tile)); 171*b095b053SXin Li * 172*b095b053SXin Li * When the call returns, all items have been processed and the thread pool is 173*b095b053SXin Li * ready for a new task. 174*b095b053SXin Li * 175*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, 176*b095b053SXin Li * the calls are serialized. 177*b095b053SXin Li * 178*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 179*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 180*b095b053SXin Li * @param function the function to call for each tile. 181*b095b053SXin Li * @param context the first argument passed to the specified function. 182*b095b053SXin Li * @param range the number of items on the 1D grid to process. 183*b095b053SXin Li * @param tile the maximum number of items on the 1D grid to process in 184*b095b053SXin Li * one function call. 185*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 186*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 187*b095b053SXin Li */ 188*b095b053SXin Li void pthreadpool_parallelize_1d_tile_1d( 189*b095b053SXin Li pthreadpool_t threadpool, 190*b095b053SXin Li pthreadpool_task_1d_tile_1d_t function, 191*b095b053SXin Li void* context, 192*b095b053SXin Li size_t range, 193*b095b053SXin Li size_t tile, 194*b095b053SXin Li uint32_t flags); 195*b095b053SXin Li 196*b095b053SXin Li /** 197*b095b053SXin Li * Process items on a 2D grid. 198*b095b053SXin Li * 199*b095b053SXin Li * The function implements a parallel version of the following snippet: 200*b095b053SXin Li * 201*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 202*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 203*b095b053SXin Li * function(context, i, j); 204*b095b053SXin Li * 205*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 206*b095b053SXin Li * is ready for a new task. 207*b095b053SXin Li * 208*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 209*b095b053SXin Li * calls are serialized. 210*b095b053SXin Li * 211*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 212*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 213*b095b053SXin Li * @param function the function to call for each item. 214*b095b053SXin Li * @param context the first argument passed to the specified function. 215*b095b053SXin Li * @param range_i the number of items to process along the first dimension 216*b095b053SXin Li * of the 2D grid. 217*b095b053SXin Li * @param range_j the number of items to process along the second dimension 218*b095b053SXin Li * of the 2D grid. 219*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 220*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 221*b095b053SXin Li */ 222*b095b053SXin Li void pthreadpool_parallelize_2d( 223*b095b053SXin Li pthreadpool_t threadpool, 224*b095b053SXin Li pthreadpool_task_2d_t function, 225*b095b053SXin Li void* context, 226*b095b053SXin Li size_t range_i, 227*b095b053SXin Li size_t range_j, 228*b095b053SXin Li uint32_t flags); 229*b095b053SXin Li 230*b095b053SXin Li /** 231*b095b053SXin Li * Process items on a 2D grid with the specified maximum tile size along the 232*b095b053SXin Li * last grid dimension. 233*b095b053SXin Li * 234*b095b053SXin Li * The function implements a parallel version of the following snippet: 235*b095b053SXin Li * 236*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 237*b095b053SXin Li * for (size_t j = 0; j < range_j; j += tile_j) 238*b095b053SXin Li * function(context, i, j, min(range_j - j, tile_j)); 239*b095b053SXin Li * 240*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 241*b095b053SXin Li * is ready for a new task. 242*b095b053SXin Li * 243*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 244*b095b053SXin Li * calls are serialized. 245*b095b053SXin Li * 246*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 247*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 248*b095b053SXin Li * @param function the function to call for each tile. 249*b095b053SXin Li * @param context the first argument passed to the specified function. 250*b095b053SXin Li * @param range_i the number of items to process along the first dimension 251*b095b053SXin Li * of the 2D grid. 252*b095b053SXin Li * @param range_j the number of items to process along the second dimension 253*b095b053SXin Li * of the 2D grid. 254*b095b053SXin Li * @param tile_j the maximum number of items along the second dimension of 255*b095b053SXin Li * the 2D grid to process in one function call. 256*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 257*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 258*b095b053SXin Li */ 259*b095b053SXin Li void pthreadpool_parallelize_2d_tile_1d( 260*b095b053SXin Li pthreadpool_t threadpool, 261*b095b053SXin Li pthreadpool_task_2d_tile_1d_t function, 262*b095b053SXin Li void* context, 263*b095b053SXin Li size_t range_i, 264*b095b053SXin Li size_t range_j, 265*b095b053SXin Li size_t tile_j, 266*b095b053SXin Li uint32_t flags); 267*b095b053SXin Li 268*b095b053SXin Li /** 269*b095b053SXin Li * Process items on a 2D grid with the specified maximum tile size along each 270*b095b053SXin Li * grid dimension. 271*b095b053SXin Li * 272*b095b053SXin Li * The function implements a parallel version of the following snippet: 273*b095b053SXin Li * 274*b095b053SXin Li * for (size_t i = 0; i < range_i; i += tile_i) 275*b095b053SXin Li * for (size_t j = 0; j < range_j; j += tile_j) 276*b095b053SXin Li * function(context, i, j, 277*b095b053SXin Li * min(range_i - i, tile_i), min(range_j - j, tile_j)); 278*b095b053SXin Li * 279*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 280*b095b053SXin Li * is ready for a new task. 281*b095b053SXin Li * 282*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 283*b095b053SXin Li * calls are serialized. 284*b095b053SXin Li * 285*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 286*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 287*b095b053SXin Li * @param function the function to call for each tile. 288*b095b053SXin Li * @param context the first argument passed to the specified function. 289*b095b053SXin Li * @param range_i the number of items to process along the first dimension 290*b095b053SXin Li * of the 2D grid. 291*b095b053SXin Li * @param range_j the number of items to process along the second dimension 292*b095b053SXin Li * of the 2D grid. 293*b095b053SXin Li * @param tile_j the maximum number of items along the first dimension of 294*b095b053SXin Li * the 2D grid to process in one function call. 295*b095b053SXin Li * @param tile_j the maximum number of items along the second dimension of 296*b095b053SXin Li * the 2D grid to process in one function call. 297*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 298*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 299*b095b053SXin Li */ 300*b095b053SXin Li void pthreadpool_parallelize_2d_tile_2d( 301*b095b053SXin Li pthreadpool_t threadpool, 302*b095b053SXin Li pthreadpool_task_2d_tile_2d_t function, 303*b095b053SXin Li void* context, 304*b095b053SXin Li size_t range_i, 305*b095b053SXin Li size_t range_j, 306*b095b053SXin Li size_t tile_i, 307*b095b053SXin Li size_t tile_j, 308*b095b053SXin Li uint32_t flags); 309*b095b053SXin Li 310*b095b053SXin Li /** 311*b095b053SXin Li * Process items on a 2D grid with the specified maximum tile size along each 312*b095b053SXin Li * grid dimension using a microarchitecture-aware task function. 313*b095b053SXin Li * 314*b095b053SXin Li * The function implements a parallel version of the following snippet: 315*b095b053SXin Li * 316*b095b053SXin Li * uint32_t uarch_index = cpuinfo_initialize() ? 317*b095b053SXin Li * cpuinfo_get_current_uarch_index() : default_uarch_index; 318*b095b053SXin Li * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; 319*b095b053SXin Li * for (size_t i = 0; i < range_i; i += tile_i) 320*b095b053SXin Li * for (size_t j = 0; j < range_j; j += tile_j) 321*b095b053SXin Li * function(context, uarch_index, i, j, 322*b095b053SXin Li * min(range_i - i, tile_i), min(range_j - j, tile_j)); 323*b095b053SXin Li * 324*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 325*b095b053SXin Li * is ready for a new task. 326*b095b053SXin Li * 327*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 328*b095b053SXin Li * calls are serialized. 329*b095b053SXin Li * 330*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If 331*b095b053SXin Li * threadpool is NULL, all items are processed serially on the calling 332*b095b053SXin Li * thread. 333*b095b053SXin Li * @param function the function to call for each tile. 334*b095b053SXin Li * @param context the first argument passed to the specified 335*b095b053SXin Li * function. 336*b095b053SXin Li * @param default_uarch_index the microarchitecture index to use when 337*b095b053SXin Li * pthreadpool is configured without cpuinfo, 338*b095b053SXin Li * cpuinfo initialization failed, or index returned 339*b095b053SXin Li * by cpuinfo_get_current_uarch_index() exceeds 340*b095b053SXin Li * the max_uarch_index value. 341*b095b053SXin Li * @param max_uarch_index the maximum microarchitecture index expected 342*b095b053SXin Li * by the specified function. If the index returned 343*b095b053SXin Li * by cpuinfo_get_current_uarch_index() exceeds this 344*b095b053SXin Li * value, default_uarch_index will be used instead. 345*b095b053SXin Li * default_uarch_index can exceed max_uarch_index. 346*b095b053SXin Li * @param range_i the number of items to process along the first 347*b095b053SXin Li * dimension of the 2D grid. 348*b095b053SXin Li * @param range_j the number of items to process along the second 349*b095b053SXin Li * dimension of the 2D grid. 350*b095b053SXin Li * @param tile_j the maximum number of items along the first 351*b095b053SXin Li * dimension of the 2D grid to process in one function call. 352*b095b053SXin Li * @param tile_j the maximum number of items along the second 353*b095b053SXin Li * dimension of the 2D grid to process in one function call. 354*b095b053SXin Li * @param flags a bitwise combination of zero or more optional 355*b095b053SXin Li * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or 356*b095b053SXin Li * PTHREADPOOL_FLAG_YIELD_WORKERS) 357*b095b053SXin Li */ 358*b095b053SXin Li void pthreadpool_parallelize_2d_tile_2d_with_uarch( 359*b095b053SXin Li pthreadpool_t threadpool, 360*b095b053SXin Li pthreadpool_task_2d_tile_2d_with_id_t function, 361*b095b053SXin Li void* context, 362*b095b053SXin Li uint32_t default_uarch_index, 363*b095b053SXin Li uint32_t max_uarch_index, 364*b095b053SXin Li size_t range_i, 365*b095b053SXin Li size_t range_j, 366*b095b053SXin Li size_t tile_i, 367*b095b053SXin Li size_t tile_j, 368*b095b053SXin Li uint32_t flags); 369*b095b053SXin Li 370*b095b053SXin Li /** 371*b095b053SXin Li * Process items on a 3D grid. 372*b095b053SXin Li * 373*b095b053SXin Li * The function implements a parallel version of the following snippet: 374*b095b053SXin Li * 375*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 376*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 377*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 378*b095b053SXin Li * function(context, i, j, k); 379*b095b053SXin Li * 380*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 381*b095b053SXin Li * is ready for a new task. 382*b095b053SXin Li * 383*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 384*b095b053SXin Li * calls are serialized. 385*b095b053SXin Li * 386*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 387*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 388*b095b053SXin Li * @param function the function to call for each tile. 389*b095b053SXin Li * @param context the first argument passed to the specified function. 390*b095b053SXin Li * @param range_i the number of items to process along the first dimension 391*b095b053SXin Li * of the 3D grid. 392*b095b053SXin Li * @param range_j the number of items to process along the second dimension 393*b095b053SXin Li * of the 3D grid. 394*b095b053SXin Li * @param range_k the number of items to process along the third dimension 395*b095b053SXin Li * of the 3D grid. 396*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 397*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 398*b095b053SXin Li */ 399*b095b053SXin Li void pthreadpool_parallelize_3d( 400*b095b053SXin Li pthreadpool_t threadpool, 401*b095b053SXin Li pthreadpool_task_3d_t function, 402*b095b053SXin Li void* context, 403*b095b053SXin Li size_t range_i, 404*b095b053SXin Li size_t range_j, 405*b095b053SXin Li size_t range_k, 406*b095b053SXin Li uint32_t flags); 407*b095b053SXin Li 408*b095b053SXin Li /** 409*b095b053SXin Li * Process items on a 3D grid with the specified maximum tile size along the 410*b095b053SXin Li * last grid dimension. 411*b095b053SXin Li * 412*b095b053SXin Li * The function implements a parallel version of the following snippet: 413*b095b053SXin Li * 414*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 415*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 416*b095b053SXin Li * for (size_t k = 0; k < range_k; k += tile_k) 417*b095b053SXin Li * function(context, i, j, k, min(range_k - k, tile_k)); 418*b095b053SXin Li * 419*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 420*b095b053SXin Li * is ready for a new task. 421*b095b053SXin Li * 422*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 423*b095b053SXin Li * calls are serialized. 424*b095b053SXin Li * 425*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 426*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 427*b095b053SXin Li * @param function the function to call for each tile. 428*b095b053SXin Li * @param context the first argument passed to the specified function. 429*b095b053SXin Li * @param range_i the number of items to process along the first dimension 430*b095b053SXin Li * of the 3D grid. 431*b095b053SXin Li * @param range_j the number of items to process along the second dimension 432*b095b053SXin Li * of the 3D grid. 433*b095b053SXin Li * @param range_k the number of items to process along the third dimension 434*b095b053SXin Li * of the 3D grid. 435*b095b053SXin Li * @param tile_k the maximum number of items along the third dimension of 436*b095b053SXin Li * the 3D grid to process in one function call. 437*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 438*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 439*b095b053SXin Li */ 440*b095b053SXin Li void pthreadpool_parallelize_3d_tile_1d( 441*b095b053SXin Li pthreadpool_t threadpool, 442*b095b053SXin Li pthreadpool_task_3d_tile_1d_t function, 443*b095b053SXin Li void* context, 444*b095b053SXin Li size_t range_i, 445*b095b053SXin Li size_t range_j, 446*b095b053SXin Li size_t range_k, 447*b095b053SXin Li size_t tile_k, 448*b095b053SXin Li uint32_t flags); 449*b095b053SXin Li 450*b095b053SXin Li /** 451*b095b053SXin Li * Process items on a 3D grid with the specified maximum tile size along the 452*b095b053SXin Li * last two grid dimensions. 453*b095b053SXin Li * 454*b095b053SXin Li * The function implements a parallel version of the following snippet: 455*b095b053SXin Li * 456*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 457*b095b053SXin Li * for (size_t j = 0; j < range_j; j += tile_j) 458*b095b053SXin Li * for (size_t k = 0; k < range_k; k += tile_k) 459*b095b053SXin Li * function(context, i, j, k, 460*b095b053SXin Li * min(range_j - j, tile_j), min(range_k - k, tile_k)); 461*b095b053SXin Li * 462*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 463*b095b053SXin Li * is ready for a new task. 464*b095b053SXin Li * 465*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 466*b095b053SXin Li * calls are serialized. 467*b095b053SXin Li * 468*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 469*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 470*b095b053SXin Li * @param function the function to call for each tile. 471*b095b053SXin Li * @param context the first argument passed to the specified function. 472*b095b053SXin Li * @param range_i the number of items to process along the first dimension 473*b095b053SXin Li * of the 3D grid. 474*b095b053SXin Li * @param range_j the number of items to process along the second dimension 475*b095b053SXin Li * of the 3D grid. 476*b095b053SXin Li * @param range_k the number of items to process along the third dimension 477*b095b053SXin Li * of the 3D grid. 478*b095b053SXin Li * @param tile_j the maximum number of items along the second dimension of 479*b095b053SXin Li * the 3D grid to process in one function call. 480*b095b053SXin Li * @param tile_k the maximum number of items along the third dimension of 481*b095b053SXin Li * the 3D grid to process in one function call. 482*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 483*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 484*b095b053SXin Li */ 485*b095b053SXin Li void pthreadpool_parallelize_3d_tile_2d( 486*b095b053SXin Li pthreadpool_t threadpool, 487*b095b053SXin Li pthreadpool_task_3d_tile_2d_t function, 488*b095b053SXin Li void* context, 489*b095b053SXin Li size_t range_i, 490*b095b053SXin Li size_t range_j, 491*b095b053SXin Li size_t range_k, 492*b095b053SXin Li size_t tile_j, 493*b095b053SXin Li size_t tile_k, 494*b095b053SXin Li uint32_t flags); 495*b095b053SXin Li 496*b095b053SXin Li /** 497*b095b053SXin Li * Process items on a 3D grid with the specified maximum tile size along the 498*b095b053SXin Li * last two grid dimensions using a microarchitecture-aware task function. 499*b095b053SXin Li * 500*b095b053SXin Li * The function implements a parallel version of the following snippet: 501*b095b053SXin Li * 502*b095b053SXin Li * uint32_t uarch_index = cpuinfo_initialize() ? 503*b095b053SXin Li * cpuinfo_get_current_uarch_index() : default_uarch_index; 504*b095b053SXin Li * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; 505*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 506*b095b053SXin Li * for (size_t j = 0; j < range_j; j += tile_j) 507*b095b053SXin Li * for (size_t k = 0; k < range_k; k += tile_k) 508*b095b053SXin Li * function(context, uarch_index, i, j, k, 509*b095b053SXin Li * min(range_j - j, tile_j), min(range_k - k, tile_k)); 510*b095b053SXin Li * 511*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 512*b095b053SXin Li * is ready for a new task. 513*b095b053SXin Li * 514*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 515*b095b053SXin Li * calls are serialized. 516*b095b053SXin Li * 517*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If 518*b095b053SXin Li * threadpool is NULL, all items are processed serially on the calling 519*b095b053SXin Li * thread. 520*b095b053SXin Li * @param function the function to call for each tile. 521*b095b053SXin Li * @param context the first argument passed to the specified 522*b095b053SXin Li * function. 523*b095b053SXin Li * @param default_uarch_index the microarchitecture index to use when 524*b095b053SXin Li * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, 525*b095b053SXin Li * or index returned by cpuinfo_get_current_uarch_index() exceeds the 526*b095b053SXin Li * max_uarch_index value. 527*b095b053SXin Li * @param max_uarch_index the maximum microarchitecture index expected by 528*b095b053SXin Li * the specified function. If the index returned by 529*b095b053SXin Li * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index 530*b095b053SXin Li * will be used instead. default_uarch_index can exceed max_uarch_index. 531*b095b053SXin Li * @param range_i the number of items to process along the first 532*b095b053SXin Li * dimension of the 3D grid. 533*b095b053SXin Li * @param range_j the number of items to process along the second 534*b095b053SXin Li * dimension of the 3D grid. 535*b095b053SXin Li * @param range_k the number of items to process along the third 536*b095b053SXin Li * dimension of the 3D grid. 537*b095b053SXin Li * @param tile_j the maximum number of items along the second 538*b095b053SXin Li * dimension of the 3D grid to process in one function call. 539*b095b053SXin Li * @param tile_k the maximum number of items along the third 540*b095b053SXin Li * dimension of the 3D grid to process in one function call. 541*b095b053SXin Li * @param flags a bitwise combination of zero or more optional 542*b095b053SXin Li * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or 543*b095b053SXin Li * PTHREADPOOL_FLAG_YIELD_WORKERS) 544*b095b053SXin Li */ 545*b095b053SXin Li void pthreadpool_parallelize_3d_tile_2d_with_uarch( 546*b095b053SXin Li pthreadpool_t threadpool, 547*b095b053SXin Li pthreadpool_task_3d_tile_2d_with_id_t function, 548*b095b053SXin Li void* context, 549*b095b053SXin Li uint32_t default_uarch_index, 550*b095b053SXin Li uint32_t max_uarch_index, 551*b095b053SXin Li size_t range_i, 552*b095b053SXin Li size_t range_j, 553*b095b053SXin Li size_t range_k, 554*b095b053SXin Li size_t tile_j, 555*b095b053SXin Li size_t tile_k, 556*b095b053SXin Li uint32_t flags); 557*b095b053SXin Li 558*b095b053SXin Li /** 559*b095b053SXin Li * Process items on a 4D grid. 560*b095b053SXin Li * 561*b095b053SXin Li * The function implements a parallel version of the following snippet: 562*b095b053SXin Li * 563*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 564*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 565*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 566*b095b053SXin Li * for (size_t l = 0; l < range_l; l++) 567*b095b053SXin Li * function(context, i, j, k, l); 568*b095b053SXin Li * 569*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 570*b095b053SXin Li * is ready for a new task. 571*b095b053SXin Li * 572*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 573*b095b053SXin Li * calls are serialized. 574*b095b053SXin Li * 575*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 576*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 577*b095b053SXin Li * @param function the function to call for each tile. 578*b095b053SXin Li * @param context the first argument passed to the specified function. 579*b095b053SXin Li * @param range_i the number of items to process along the first dimension 580*b095b053SXin Li * of the 4D grid. 581*b095b053SXin Li * @param range_j the number of items to process along the second dimension 582*b095b053SXin Li * of the 4D grid. 583*b095b053SXin Li * @param range_k the number of items to process along the third dimension 584*b095b053SXin Li * of the 4D grid. 585*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 586*b095b053SXin Li * of the 4D grid. 587*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 588*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 589*b095b053SXin Li */ 590*b095b053SXin Li void pthreadpool_parallelize_4d( 591*b095b053SXin Li pthreadpool_t threadpool, 592*b095b053SXin Li pthreadpool_task_4d_t function, 593*b095b053SXin Li void* context, 594*b095b053SXin Li size_t range_i, 595*b095b053SXin Li size_t range_j, 596*b095b053SXin Li size_t range_k, 597*b095b053SXin Li size_t range_l, 598*b095b053SXin Li uint32_t flags); 599*b095b053SXin Li 600*b095b053SXin Li /** 601*b095b053SXin Li * Process items on a 4D grid with the specified maximum tile size along the 602*b095b053SXin Li * last grid dimension. 603*b095b053SXin Li * 604*b095b053SXin Li * The function implements a parallel version of the following snippet: 605*b095b053SXin Li * 606*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 607*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 608*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 609*b095b053SXin Li * for (size_t l = 0; l < range_l; l += tile_l) 610*b095b053SXin Li * function(context, i, j, k, l, min(range_l - l, tile_l)); 611*b095b053SXin Li * 612*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 613*b095b053SXin Li * is ready for a new task. 614*b095b053SXin Li * 615*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 616*b095b053SXin Li * calls are serialized. 617*b095b053SXin Li * 618*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 619*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 620*b095b053SXin Li * @param function the function to call for each tile. 621*b095b053SXin Li * @param context the first argument passed to the specified function. 622*b095b053SXin Li * @param range_i the number of items to process along the first dimension 623*b095b053SXin Li * of the 4D grid. 624*b095b053SXin Li * @param range_j the number of items to process along the second dimension 625*b095b053SXin Li * of the 4D grid. 626*b095b053SXin Li * @param range_k the number of items to process along the third dimension 627*b095b053SXin Li * of the 4D grid. 628*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 629*b095b053SXin Li * of the 4D grid. 630*b095b053SXin Li * @param tile_l the maximum number of items along the fourth dimension of 631*b095b053SXin Li * the 4D grid to process in one function call. 632*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 633*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 634*b095b053SXin Li */ 635*b095b053SXin Li void pthreadpool_parallelize_4d_tile_1d( 636*b095b053SXin Li pthreadpool_t threadpool, 637*b095b053SXin Li pthreadpool_task_4d_tile_1d_t function, 638*b095b053SXin Li void* context, 639*b095b053SXin Li size_t range_i, 640*b095b053SXin Li size_t range_j, 641*b095b053SXin Li size_t range_k, 642*b095b053SXin Li size_t range_l, 643*b095b053SXin Li size_t tile_l, 644*b095b053SXin Li uint32_t flags); 645*b095b053SXin Li 646*b095b053SXin Li /** 647*b095b053SXin Li * Process items on a 4D grid with the specified maximum tile size along the 648*b095b053SXin Li * last two grid dimensions. 649*b095b053SXin Li * 650*b095b053SXin Li * The function implements a parallel version of the following snippet: 651*b095b053SXin Li * 652*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 653*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 654*b095b053SXin Li * for (size_t k = 0; k < range_k; k += tile_k) 655*b095b053SXin Li * for (size_t l = 0; l < range_l; l += tile_l) 656*b095b053SXin Li * function(context, i, j, k, l, 657*b095b053SXin Li * min(range_k - k, tile_k), min(range_l - l, tile_l)); 658*b095b053SXin Li * 659*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 660*b095b053SXin Li * is ready for a new task. 661*b095b053SXin Li * 662*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 663*b095b053SXin Li * calls are serialized. 664*b095b053SXin Li * 665*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 666*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 667*b095b053SXin Li * @param function the function to call for each tile. 668*b095b053SXin Li * @param context the first argument passed to the specified function. 669*b095b053SXin Li * @param range_i the number of items to process along the first dimension 670*b095b053SXin Li * of the 4D grid. 671*b095b053SXin Li * @param range_j the number of items to process along the second dimension 672*b095b053SXin Li * of the 4D grid. 673*b095b053SXin Li * @param range_k the number of items to process along the third dimension 674*b095b053SXin Li * of the 4D grid. 675*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 676*b095b053SXin Li * of the 4D grid. 677*b095b053SXin Li * @param tile_k the maximum number of items along the third dimension of 678*b095b053SXin Li * the 4D grid to process in one function call. 679*b095b053SXin Li * @param tile_l the maximum number of items along the fourth dimension of 680*b095b053SXin Li * the 4D grid to process in one function call. 681*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 682*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 683*b095b053SXin Li */ 684*b095b053SXin Li void pthreadpool_parallelize_4d_tile_2d( 685*b095b053SXin Li pthreadpool_t threadpool, 686*b095b053SXin Li pthreadpool_task_4d_tile_2d_t function, 687*b095b053SXin Li void* context, 688*b095b053SXin Li size_t range_i, 689*b095b053SXin Li size_t range_j, 690*b095b053SXin Li size_t range_k, 691*b095b053SXin Li size_t range_l, 692*b095b053SXin Li size_t tile_k, 693*b095b053SXin Li size_t tile_l, 694*b095b053SXin Li uint32_t flags); 695*b095b053SXin Li 696*b095b053SXin Li /** 697*b095b053SXin Li * Process items on a 4D grid with the specified maximum tile size along the 698*b095b053SXin Li * last two grid dimensions using a microarchitecture-aware task function. 699*b095b053SXin Li * 700*b095b053SXin Li * The function implements a parallel version of the following snippet: 701*b095b053SXin Li * 702*b095b053SXin Li * uint32_t uarch_index = cpuinfo_initialize() ? 703*b095b053SXin Li * cpuinfo_get_current_uarch_index() : default_uarch_index; 704*b095b053SXin Li * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; 705*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 706*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 707*b095b053SXin Li * for (size_t k = 0; k < range_k; k += tile_k) 708*b095b053SXin Li * for (size_t l = 0; l < range_l; l += tile_l) 709*b095b053SXin Li * function(context, uarch_index, i, j, k, l, 710*b095b053SXin Li * min(range_k - k, tile_k), min(range_l - l, tile_l)); 711*b095b053SXin Li * 712*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 713*b095b053SXin Li * is ready for a new task. 714*b095b053SXin Li * 715*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 716*b095b053SXin Li * calls are serialized. 717*b095b053SXin Li * 718*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If 719*b095b053SXin Li * threadpool is NULL, all items are processed serially on the calling 720*b095b053SXin Li * thread. 721*b095b053SXin Li * @param function the function to call for each tile. 722*b095b053SXin Li * @param context the first argument passed to the specified 723*b095b053SXin Li * function. 724*b095b053SXin Li * @param default_uarch_index the microarchitecture index to use when 725*b095b053SXin Li * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, 726*b095b053SXin Li * or index returned by cpuinfo_get_current_uarch_index() exceeds the 727*b095b053SXin Li * max_uarch_index value. 728*b095b053SXin Li * @param max_uarch_index the maximum microarchitecture index expected by 729*b095b053SXin Li * the specified function. If the index returned by 730*b095b053SXin Li * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index 731*b095b053SXin Li * will be used instead. default_uarch_index can exceed max_uarch_index. 732*b095b053SXin Li * @param range_i the number of items to process along the first 733*b095b053SXin Li * dimension of the 4D grid. 734*b095b053SXin Li * @param range_j the number of items to process along the second 735*b095b053SXin Li * dimension of the 4D grid. 736*b095b053SXin Li * @param range_k the number of items to process along the third 737*b095b053SXin Li * dimension of the 4D grid. 738*b095b053SXin Li * @param range_l the number of items to process along the fourth 739*b095b053SXin Li * dimension of the 4D grid. 740*b095b053SXin Li * @param tile_k the maximum number of items along the third 741*b095b053SXin Li * dimension of the 4D grid to process in one function call. 742*b095b053SXin Li * @param tile_l the maximum number of items along the fourth 743*b095b053SXin Li * dimension of the 4D grid to process in one function call. 744*b095b053SXin Li * @param flags a bitwise combination of zero or more optional 745*b095b053SXin Li * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or 746*b095b053SXin Li * PTHREADPOOL_FLAG_YIELD_WORKERS) 747*b095b053SXin Li */ 748*b095b053SXin Li void pthreadpool_parallelize_4d_tile_2d_with_uarch( 749*b095b053SXin Li pthreadpool_t threadpool, 750*b095b053SXin Li pthreadpool_task_4d_tile_2d_with_id_t function, 751*b095b053SXin Li void* context, 752*b095b053SXin Li uint32_t default_uarch_index, 753*b095b053SXin Li uint32_t max_uarch_index, 754*b095b053SXin Li size_t range_i, 755*b095b053SXin Li size_t range_j, 756*b095b053SXin Li size_t range_k, 757*b095b053SXin Li size_t range_l, 758*b095b053SXin Li size_t tile_k, 759*b095b053SXin Li size_t tile_l, 760*b095b053SXin Li uint32_t flags); 761*b095b053SXin Li 762*b095b053SXin Li /** 763*b095b053SXin Li * Process items on a 5D grid. 764*b095b053SXin Li * 765*b095b053SXin Li * The function implements a parallel version of the following snippet: 766*b095b053SXin Li * 767*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 768*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 769*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 770*b095b053SXin Li * for (size_t l = 0; l < range_l; l++) 771*b095b053SXin Li * for (size_t m = 0; m < range_m; m++) 772*b095b053SXin Li * function(context, i, j, k, l, m); 773*b095b053SXin Li * 774*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 775*b095b053SXin Li * is ready for a new task. 776*b095b053SXin Li * 777*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 778*b095b053SXin Li * calls are serialized. 779*b095b053SXin Li * 780*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 781*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 782*b095b053SXin Li * @param function the function to call for each tile. 783*b095b053SXin Li * @param context the first argument passed to the specified function. 784*b095b053SXin Li * @param range_i the number of items to process along the first dimension 785*b095b053SXin Li * of the 5D grid. 786*b095b053SXin Li * @param range_j the number of items to process along the second dimension 787*b095b053SXin Li * of the 5D grid. 788*b095b053SXin Li * @param range_k the number of items to process along the third dimension 789*b095b053SXin Li * of the 5D grid. 790*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 791*b095b053SXin Li * of the 5D grid. 792*b095b053SXin Li * @param range_m the number of items to process along the fifth dimension 793*b095b053SXin Li * of the 5D grid. 794*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 795*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 796*b095b053SXin Li */ 797*b095b053SXin Li void pthreadpool_parallelize_5d( 798*b095b053SXin Li pthreadpool_t threadpool, 799*b095b053SXin Li pthreadpool_task_5d_t function, 800*b095b053SXin Li void* context, 801*b095b053SXin Li size_t range_i, 802*b095b053SXin Li size_t range_j, 803*b095b053SXin Li size_t range_k, 804*b095b053SXin Li size_t range_l, 805*b095b053SXin Li size_t range_m, 806*b095b053SXin Li uint32_t flags); 807*b095b053SXin Li 808*b095b053SXin Li /** 809*b095b053SXin Li * Process items on a 5D grid with the specified maximum tile size along the 810*b095b053SXin Li * last grid dimension. 811*b095b053SXin Li * 812*b095b053SXin Li * The function implements a parallel version of the following snippet: 813*b095b053SXin Li * 814*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 815*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 816*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 817*b095b053SXin Li * for (size_t l = 0; l < range_l; l++) 818*b095b053SXin Li * for (size_t m = 0; m < range_m; m += tile_m) 819*b095b053SXin Li * function(context, i, j, k, l, m, min(range_m - m, tile_m)); 820*b095b053SXin Li * 821*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 822*b095b053SXin Li * is ready for a new task. 823*b095b053SXin Li * 824*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 825*b095b053SXin Li * calls are serialized. 826*b095b053SXin Li * 827*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 828*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 829*b095b053SXin Li * @param function the function to call for each tile. 830*b095b053SXin Li * @param context the first argument passed to the specified function. 831*b095b053SXin Li * @param range_i the number of items to process along the first dimension 832*b095b053SXin Li * of the 5D grid. 833*b095b053SXin Li * @param range_j the number of items to process along the second dimension 834*b095b053SXin Li * of the 5D grid. 835*b095b053SXin Li * @param range_k the number of items to process along the third dimension 836*b095b053SXin Li * of the 5D grid. 837*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 838*b095b053SXin Li * of the 5D grid. 839*b095b053SXin Li * @param range_m the number of items to process along the fifth dimension 840*b095b053SXin Li * of the 5D grid. 841*b095b053SXin Li * @param tile_m the maximum number of items along the fifth dimension of 842*b095b053SXin Li * the 5D grid to process in one function call. 843*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 844*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 845*b095b053SXin Li */ 846*b095b053SXin Li void pthreadpool_parallelize_5d_tile_1d( 847*b095b053SXin Li pthreadpool_t threadpool, 848*b095b053SXin Li pthreadpool_task_5d_tile_1d_t function, 849*b095b053SXin Li void* context, 850*b095b053SXin Li size_t range_i, 851*b095b053SXin Li size_t range_j, 852*b095b053SXin Li size_t range_k, 853*b095b053SXin Li size_t range_l, 854*b095b053SXin Li size_t range_m, 855*b095b053SXin Li size_t tile_m, 856*b095b053SXin Li uint32_t flags); 857*b095b053SXin Li 858*b095b053SXin Li /** 859*b095b053SXin Li * Process items on a 5D grid with the specified maximum tile size along the 860*b095b053SXin Li * last two grid dimensions. 861*b095b053SXin Li * 862*b095b053SXin Li * The function implements a parallel version of the following snippet: 863*b095b053SXin Li * 864*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 865*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 866*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 867*b095b053SXin Li * for (size_t l = 0; l < range_l; l += tile_l) 868*b095b053SXin Li * for (size_t m = 0; m < range_m; m += tile_m) 869*b095b053SXin Li * function(context, i, j, k, l, m, 870*b095b053SXin Li * min(range_l - l, tile_l), min(range_m - m, tile_m)); 871*b095b053SXin Li * 872*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 873*b095b053SXin Li * is ready for a new task. 874*b095b053SXin Li * 875*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 876*b095b053SXin Li * calls are serialized. 877*b095b053SXin Li * 878*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 879*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 880*b095b053SXin Li * @param function the function to call for each tile. 881*b095b053SXin Li * @param context the first argument passed to the specified function. 882*b095b053SXin Li * @param range_i the number of items to process along the first dimension 883*b095b053SXin Li * of the 5D grid. 884*b095b053SXin Li * @param range_j the number of items to process along the second dimension 885*b095b053SXin Li * of the 5D grid. 886*b095b053SXin Li * @param range_k the number of items to process along the third dimension 887*b095b053SXin Li * of the 5D grid. 888*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 889*b095b053SXin Li * of the 5D grid. 890*b095b053SXin Li * @param range_m the number of items to process along the fifth dimension 891*b095b053SXin Li * of the 5D grid. 892*b095b053SXin Li * @param tile_l the maximum number of items along the fourth dimension of 893*b095b053SXin Li * the 5D grid to process in one function call. 894*b095b053SXin Li * @param tile_m the maximum number of items along the fifth dimension of 895*b095b053SXin Li * the 5D grid to process in one function call. 896*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 897*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 898*b095b053SXin Li */ 899*b095b053SXin Li void pthreadpool_parallelize_5d_tile_2d( 900*b095b053SXin Li pthreadpool_t threadpool, 901*b095b053SXin Li pthreadpool_task_5d_tile_2d_t function, 902*b095b053SXin Li void* context, 903*b095b053SXin Li size_t range_i, 904*b095b053SXin Li size_t range_j, 905*b095b053SXin Li size_t range_k, 906*b095b053SXin Li size_t range_l, 907*b095b053SXin Li size_t range_m, 908*b095b053SXin Li size_t tile_l, 909*b095b053SXin Li size_t tile_m, 910*b095b053SXin Li uint32_t flags); 911*b095b053SXin Li 912*b095b053SXin Li /** 913*b095b053SXin Li * Process items on a 6D grid. 914*b095b053SXin Li * 915*b095b053SXin Li * The function implements a parallel version of the following snippet: 916*b095b053SXin Li * 917*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 918*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 919*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 920*b095b053SXin Li * for (size_t l = 0; l < range_l; l++) 921*b095b053SXin Li * for (size_t m = 0; m < range_m; m++) 922*b095b053SXin Li * for (size_t n = 0; n < range_n; n++) 923*b095b053SXin Li * function(context, i, j, k, l, m, n); 924*b095b053SXin Li * 925*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 926*b095b053SXin Li * is ready for a new task. 927*b095b053SXin Li * 928*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 929*b095b053SXin Li * calls are serialized. 930*b095b053SXin Li * 931*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 932*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 933*b095b053SXin Li * @param function the function to call for each tile. 934*b095b053SXin Li * @param context the first argument passed to the specified function. 935*b095b053SXin Li * @param range_i the number of items to process along the first dimension 936*b095b053SXin Li * of the 6D grid. 937*b095b053SXin Li * @param range_j the number of items to process along the second dimension 938*b095b053SXin Li * of the 6D grid. 939*b095b053SXin Li * @param range_k the number of items to process along the third dimension 940*b095b053SXin Li * of the 6D grid. 941*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 942*b095b053SXin Li * of the 6D grid. 943*b095b053SXin Li * @param range_m the number of items to process along the fifth dimension 944*b095b053SXin Li * of the 6D grid. 945*b095b053SXin Li * @param range_n the number of items to process along the sixth dimension 946*b095b053SXin Li * of the 6D grid. 947*b095b053SXin Li * @param tile_n the maximum number of items along the sixth dimension of 948*b095b053SXin Li * the 6D grid to process in one function call. 949*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 950*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 951*b095b053SXin Li */ 952*b095b053SXin Li void pthreadpool_parallelize_6d( 953*b095b053SXin Li pthreadpool_t threadpool, 954*b095b053SXin Li pthreadpool_task_6d_t function, 955*b095b053SXin Li void* context, 956*b095b053SXin Li size_t range_i, 957*b095b053SXin Li size_t range_j, 958*b095b053SXin Li size_t range_k, 959*b095b053SXin Li size_t range_l, 960*b095b053SXin Li size_t range_m, 961*b095b053SXin Li size_t range_n, 962*b095b053SXin Li uint32_t flags); 963*b095b053SXin Li 964*b095b053SXin Li /** 965*b095b053SXin Li * Process items on a 6D grid with the specified maximum tile size along the 966*b095b053SXin Li * last grid dimension. 967*b095b053SXin Li * 968*b095b053SXin Li * The function implements a parallel version of the following snippet: 969*b095b053SXin Li * 970*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 971*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 972*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 973*b095b053SXin Li * for (size_t l = 0; l < range_l; l++) 974*b095b053SXin Li * for (size_t m = 0; m < range_m; m++) 975*b095b053SXin Li * for (size_t n = 0; n < range_n; n += tile_n) 976*b095b053SXin Li * function(context, i, j, k, l, m, n, min(range_n - n, tile_n)); 977*b095b053SXin Li * 978*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 979*b095b053SXin Li * is ready for a new task. 980*b095b053SXin Li * 981*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 982*b095b053SXin Li * calls are serialized. 983*b095b053SXin Li * 984*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 985*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 986*b095b053SXin Li * @param function the function to call for each tile. 987*b095b053SXin Li * @param context the first argument passed to the specified function. 988*b095b053SXin Li * @param range_i the number of items to process along the first dimension 989*b095b053SXin Li * of the 6D grid. 990*b095b053SXin Li * @param range_j the number of items to process along the second dimension 991*b095b053SXin Li * of the 6D grid. 992*b095b053SXin Li * @param range_k the number of items to process along the third dimension 993*b095b053SXin Li * of the 6D grid. 994*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 995*b095b053SXin Li * of the 6D grid. 996*b095b053SXin Li * @param range_m the number of items to process along the fifth dimension 997*b095b053SXin Li * of the 6D grid. 998*b095b053SXin Li * @param range_n the number of items to process along the sixth dimension 999*b095b053SXin Li * of the 6D grid. 1000*b095b053SXin Li * @param tile_n the maximum number of items along the sixth dimension of 1001*b095b053SXin Li * the 6D grid to process in one function call. 1002*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 1003*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 1004*b095b053SXin Li */ 1005*b095b053SXin Li void pthreadpool_parallelize_6d_tile_1d( 1006*b095b053SXin Li pthreadpool_t threadpool, 1007*b095b053SXin Li pthreadpool_task_6d_tile_1d_t function, 1008*b095b053SXin Li void* context, 1009*b095b053SXin Li size_t range_i, 1010*b095b053SXin Li size_t range_j, 1011*b095b053SXin Li size_t range_k, 1012*b095b053SXin Li size_t range_l, 1013*b095b053SXin Li size_t range_m, 1014*b095b053SXin Li size_t range_n, 1015*b095b053SXin Li size_t tile_n, 1016*b095b053SXin Li uint32_t flags); 1017*b095b053SXin Li 1018*b095b053SXin Li /** 1019*b095b053SXin Li * Process items on a 6D grid with the specified maximum tile size along the 1020*b095b053SXin Li * last two grid dimensions. 1021*b095b053SXin Li * 1022*b095b053SXin Li * The function implements a parallel version of the following snippet: 1023*b095b053SXin Li * 1024*b095b053SXin Li * for (size_t i = 0; i < range_i; i++) 1025*b095b053SXin Li * for (size_t j = 0; j < range_j; j++) 1026*b095b053SXin Li * for (size_t k = 0; k < range_k; k++) 1027*b095b053SXin Li * for (size_t l = 0; l < range_l; l++) 1028*b095b053SXin Li * for (size_t m = 0; m < range_m; m += tile_m) 1029*b095b053SXin Li * for (size_t n = 0; n < range_n; n += tile_n) 1030*b095b053SXin Li * function(context, i, j, k, l, m, n, 1031*b095b053SXin Li * min(range_m - m, tile_m), min(range_n - n, tile_n)); 1032*b095b053SXin Li * 1033*b095b053SXin Li * When the function returns, all items have been processed and the thread pool 1034*b095b053SXin Li * is ready for a new task. 1035*b095b053SXin Li * 1036*b095b053SXin Li * @note If multiple threads call this function with the same thread pool, the 1037*b095b053SXin Li * calls are serialized. 1038*b095b053SXin Li * 1039*b095b053SXin Li * @param threadpool the thread pool to use for parallelisation. If threadpool 1040*b095b053SXin Li * is NULL, all items are processed serially on the calling thread. 1041*b095b053SXin Li * @param function the function to call for each tile. 1042*b095b053SXin Li * @param context the first argument passed to the specified function. 1043*b095b053SXin Li * @param range_i the number of items to process along the first dimension 1044*b095b053SXin Li * of the 6D grid. 1045*b095b053SXin Li * @param range_j the number of items to process along the second dimension 1046*b095b053SXin Li * of the 6D grid. 1047*b095b053SXin Li * @param range_k the number of items to process along the third dimension 1048*b095b053SXin Li * of the 6D grid. 1049*b095b053SXin Li * @param range_l the number of items to process along the fourth dimension 1050*b095b053SXin Li * of the 6D grid. 1051*b095b053SXin Li * @param range_m the number of items to process along the fifth dimension 1052*b095b053SXin Li * of the 6D grid. 1053*b095b053SXin Li * @param range_n the number of items to process along the sixth dimension 1054*b095b053SXin Li * of the 6D grid. 1055*b095b053SXin Li * @param tile_m the maximum number of items along the fifth dimension of 1056*b095b053SXin Li * the 6D grid to process in one function call. 1057*b095b053SXin Li * @param tile_n the maximum number of items along the sixth dimension of 1058*b095b053SXin Li * the 6D grid to process in one function call. 1059*b095b053SXin Li * @param flags a bitwise combination of zero or more optional flags 1060*b095b053SXin Li * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) 1061*b095b053SXin Li */ 1062*b095b053SXin Li void pthreadpool_parallelize_6d_tile_2d( 1063*b095b053SXin Li pthreadpool_t threadpool, 1064*b095b053SXin Li pthreadpool_task_6d_tile_2d_t function, 1065*b095b053SXin Li void* context, 1066*b095b053SXin Li size_t range_i, 1067*b095b053SXin Li size_t range_j, 1068*b095b053SXin Li size_t range_k, 1069*b095b053SXin Li size_t range_l, 1070*b095b053SXin Li size_t range_m, 1071*b095b053SXin Li size_t range_n, 1072*b095b053SXin Li size_t tile_m, 1073*b095b053SXin Li size_t tile_n, 1074*b095b053SXin Li uint32_t flags); 1075*b095b053SXin Li 1076*b095b053SXin Li /** 1077*b095b053SXin Li * Terminates threads in the thread pool and releases associated resources. 1078*b095b053SXin Li * 1079*b095b053SXin Li * @warning Accessing the thread pool after a call to this function constitutes 1080*b095b053SXin Li * undefined behaviour and may cause data corruption. 1081*b095b053SXin Li * 1082*b095b053SXin Li * @param[in,out] threadpool The thread pool to destroy. 1083*b095b053SXin Li */ 1084*b095b053SXin Li void pthreadpool_destroy(pthreadpool_t threadpool); 1085*b095b053SXin Li 1086*b095b053SXin Li 1087*b095b053SXin Li #ifndef PTHREADPOOL_NO_DEPRECATED_API 1088*b095b053SXin Li 1089*b095b053SXin Li /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */ 1090*b095b053SXin Li #if defined(__GNUC__) 1091*b095b053SXin Li #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__)) 1092*b095b053SXin Li #else 1093*b095b053SXin Li #define PTHREADPOOL_DEPRECATED 1094*b095b053SXin Li #endif 1095*b095b053SXin Li 1096*b095b053SXin Li typedef void (*pthreadpool_function_1d_t)(void*, size_t) PTHREADPOOL_DEPRECATED; 1097*b095b053SXin Li typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t) PTHREADPOOL_DEPRECATED; 1098*b095b053SXin Li typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t) PTHREADPOOL_DEPRECATED; 1099*b095b053SXin Li typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t) PTHREADPOOL_DEPRECATED; 1100*b095b053SXin Li typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t) PTHREADPOOL_DEPRECATED; 1101*b095b053SXin Li typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t) PTHREADPOOL_DEPRECATED; 1102*b095b053SXin Li 1103*b095b053SXin Li void pthreadpool_compute_1d( 1104*b095b053SXin Li pthreadpool_t threadpool, 1105*b095b053SXin Li pthreadpool_function_1d_t function, 1106*b095b053SXin Li void* argument, 1107*b095b053SXin Li size_t range) PTHREADPOOL_DEPRECATED; 1108*b095b053SXin Li 1109*b095b053SXin Li void pthreadpool_compute_1d_tiled( 1110*b095b053SXin Li pthreadpool_t threadpool, 1111*b095b053SXin Li pthreadpool_function_1d_tiled_t function, 1112*b095b053SXin Li void* argument, 1113*b095b053SXin Li size_t range, 1114*b095b053SXin Li size_t tile) PTHREADPOOL_DEPRECATED; 1115*b095b053SXin Li 1116*b095b053SXin Li void pthreadpool_compute_2d( 1117*b095b053SXin Li pthreadpool_t threadpool, 1118*b095b053SXin Li pthreadpool_function_2d_t function, 1119*b095b053SXin Li void* argument, 1120*b095b053SXin Li size_t range_i, 1121*b095b053SXin Li size_t range_j) PTHREADPOOL_DEPRECATED; 1122*b095b053SXin Li 1123*b095b053SXin Li void pthreadpool_compute_2d_tiled( 1124*b095b053SXin Li pthreadpool_t threadpool, 1125*b095b053SXin Li pthreadpool_function_2d_tiled_t function, 1126*b095b053SXin Li void* argument, 1127*b095b053SXin Li size_t range_i, 1128*b095b053SXin Li size_t range_j, 1129*b095b053SXin Li size_t tile_i, 1130*b095b053SXin Li size_t tile_j) PTHREADPOOL_DEPRECATED; 1131*b095b053SXin Li 1132*b095b053SXin Li void pthreadpool_compute_3d_tiled( 1133*b095b053SXin Li pthreadpool_t threadpool, 1134*b095b053SXin Li pthreadpool_function_3d_tiled_t function, 1135*b095b053SXin Li void* argument, 1136*b095b053SXin Li size_t range_i, 1137*b095b053SXin Li size_t range_j, 1138*b095b053SXin Li size_t range_k, 1139*b095b053SXin Li size_t tile_i, 1140*b095b053SXin Li size_t tile_j, 1141*b095b053SXin Li size_t tile_k) PTHREADPOOL_DEPRECATED; 1142*b095b053SXin Li 1143*b095b053SXin Li void pthreadpool_compute_4d_tiled( 1144*b095b053SXin Li pthreadpool_t threadpool, 1145*b095b053SXin Li pthreadpool_function_4d_tiled_t function, 1146*b095b053SXin Li void* argument, 1147*b095b053SXin Li size_t range_i, 1148*b095b053SXin Li size_t range_j, 1149*b095b053SXin Li size_t range_k, 1150*b095b053SXin Li size_t range_l, 1151*b095b053SXin Li size_t tile_i, 1152*b095b053SXin Li size_t tile_j, 1153*b095b053SXin Li size_t tile_k, 1154*b095b053SXin Li size_t tile_l) PTHREADPOOL_DEPRECATED; 1155*b095b053SXin Li 1156*b095b053SXin Li #endif /* PTHREADPOOL_NO_DEPRECATED_API */ 1157*b095b053SXin Li 1158*b095b053SXin Li #ifdef __cplusplus 1159*b095b053SXin Li } /* extern "C" */ 1160*b095b053SXin Li #endif 1161*b095b053SXin Li 1162*b095b053SXin Li #endif /* PTHREADPOOL_H_ */ 1163