xref: /aosp_15_r20/external/pthreadpool/src/shim.c (revision b095b0533730c2930f947df924a4486d266faa1a)
1*b095b053SXin Li /* Standard C headers */
2*b095b053SXin Li #include <stddef.h>
3*b095b053SXin Li 
4*b095b053SXin Li /* Public library header */
5*b095b053SXin Li #include <pthreadpool.h>
6*b095b053SXin Li 
7*b095b053SXin Li /* Internal library headers */
8*b095b053SXin Li #include "threadpool-utils.h"
9*b095b053SXin Li 
10*b095b053SXin Li 
11*b095b053SXin Li struct pthreadpool {
12*b095b053SXin Li };
13*b095b053SXin Li 
14*b095b053SXin Li static const struct pthreadpool static_pthreadpool = { };
15*b095b053SXin Li 
16*b095b053SXin Li 
pthreadpool_create(size_t threads_count)17*b095b053SXin Li struct pthreadpool* pthreadpool_create(size_t threads_count) {
18*b095b053SXin Li 	if (threads_count <= 1) {
19*b095b053SXin Li 		return (struct pthreadpool*) &static_pthreadpool;
20*b095b053SXin Li 	}
21*b095b053SXin Li 
22*b095b053SXin Li 	return NULL;
23*b095b053SXin Li }
24*b095b053SXin Li 
pthreadpool_get_threads_count(struct pthreadpool * threadpool)25*b095b053SXin Li size_t pthreadpool_get_threads_count(struct pthreadpool* threadpool) {
26*b095b053SXin Li 	return 1;
27*b095b053SXin Li }
28*b095b053SXin Li 
pthreadpool_parallelize_1d(struct pthreadpool * threadpool,pthreadpool_task_1d_t task,void * argument,size_t range,uint32_t flags)29*b095b053SXin Li void pthreadpool_parallelize_1d(
30*b095b053SXin Li 	struct pthreadpool* threadpool,
31*b095b053SXin Li 	pthreadpool_task_1d_t task,
32*b095b053SXin Li 	void* argument,
33*b095b053SXin Li 	size_t range,
34*b095b053SXin Li 	uint32_t flags)
35*b095b053SXin Li {
36*b095b053SXin Li 	for (size_t i = 0; i < range; i++) {
37*b095b053SXin Li 		task(argument, i);
38*b095b053SXin Li 	}
39*b095b053SXin Li }
40*b095b053SXin Li 
pthreadpool_parallelize_1d_with_uarch(pthreadpool_t threadpool,pthreadpool_task_1d_with_id_t task,void * argument,uint32_t default_uarch_index,uint32_t max_uarch_index,size_t range,uint32_t flags)41*b095b053SXin Li void pthreadpool_parallelize_1d_with_uarch(
42*b095b053SXin Li 	pthreadpool_t threadpool,
43*b095b053SXin Li 	pthreadpool_task_1d_with_id_t task,
44*b095b053SXin Li 	void* argument,
45*b095b053SXin Li 	uint32_t default_uarch_index,
46*b095b053SXin Li 	uint32_t max_uarch_index,
47*b095b053SXin Li 	size_t range,
48*b095b053SXin Li 	uint32_t flags)
49*b095b053SXin Li {
50*b095b053SXin Li 	for (size_t i = 0; i < range; i++) {
51*b095b053SXin Li 		task(argument, default_uarch_index, i);
52*b095b053SXin Li 	}
53*b095b053SXin Li }
54*b095b053SXin Li 
pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,pthreadpool_task_1d_tile_1d_t task,void * argument,size_t range,size_t tile,uint32_t flags)55*b095b053SXin Li void pthreadpool_parallelize_1d_tile_1d(
56*b095b053SXin Li 	pthreadpool_t threadpool,
57*b095b053SXin Li 	pthreadpool_task_1d_tile_1d_t task,
58*b095b053SXin Li 	void* argument,
59*b095b053SXin Li 	size_t range,
60*b095b053SXin Li 	size_t tile,
61*b095b053SXin Li 	uint32_t flags)
62*b095b053SXin Li {
63*b095b053SXin Li 	for (size_t i = 0; i < range; i += tile) {
64*b095b053SXin Li 		task(argument, i, min(range - i, tile));
65*b095b053SXin Li 	}
66*b095b053SXin Li }
67*b095b053SXin Li 
pthreadpool_parallelize_2d(struct pthreadpool * threadpool,pthreadpool_task_2d_t task,void * argument,size_t range_i,size_t range_j,uint32_t flags)68*b095b053SXin Li void pthreadpool_parallelize_2d(
69*b095b053SXin Li 	struct pthreadpool* threadpool,
70*b095b053SXin Li 	pthreadpool_task_2d_t task,
71*b095b053SXin Li 	void* argument,
72*b095b053SXin Li 	size_t range_i,
73*b095b053SXin Li 	size_t range_j,
74*b095b053SXin Li 	uint32_t flags)
75*b095b053SXin Li {
76*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
77*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
78*b095b053SXin Li 			task(argument, i, j);
79*b095b053SXin Li 		}
80*b095b053SXin Li 	}
81*b095b053SXin Li }
82*b095b053SXin Li 
pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,pthreadpool_task_2d_tile_1d_t task,void * argument,size_t range_i,size_t range_j,size_t tile_j,uint32_t flags)83*b095b053SXin Li void pthreadpool_parallelize_2d_tile_1d(
84*b095b053SXin Li 	pthreadpool_t threadpool,
85*b095b053SXin Li 	pthreadpool_task_2d_tile_1d_t task,
86*b095b053SXin Li 	void* argument,
87*b095b053SXin Li 	size_t range_i,
88*b095b053SXin Li 	size_t range_j,
89*b095b053SXin Li 	size_t tile_j,
90*b095b053SXin Li 	uint32_t flags)
91*b095b053SXin Li {
92*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
93*b095b053SXin Li 		for (size_t j = 0; j < range_j; j += tile_j) {
94*b095b053SXin Li 			task(argument, i, j, min(range_j - j, tile_j));
95*b095b053SXin Li 		}
96*b095b053SXin Li 	}
97*b095b053SXin Li }
98*b095b053SXin Li 
pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,pthreadpool_task_2d_tile_2d_t task,void * argument,size_t range_i,size_t range_j,size_t tile_i,size_t tile_j,uint32_t flags)99*b095b053SXin Li void pthreadpool_parallelize_2d_tile_2d(
100*b095b053SXin Li 	pthreadpool_t threadpool,
101*b095b053SXin Li 	pthreadpool_task_2d_tile_2d_t task,
102*b095b053SXin Li 	void* argument,
103*b095b053SXin Li 	size_t range_i,
104*b095b053SXin Li 	size_t range_j,
105*b095b053SXin Li 	size_t tile_i,
106*b095b053SXin Li 	size_t tile_j,
107*b095b053SXin Li 	uint32_t flags)
108*b095b053SXin Li {
109*b095b053SXin Li 	for (size_t i = 0; i < range_i; i += tile_i) {
110*b095b053SXin Li 		for (size_t j = 0; j < range_j; j += tile_j) {
111*b095b053SXin Li 			task(argument, i, j, min(range_i - i, tile_i), min(range_j - j, tile_j));
112*b095b053SXin Li 		}
113*b095b053SXin Li 	}
114*b095b053SXin Li }
115*b095b053SXin Li 
pthreadpool_parallelize_2d_tile_2d_with_uarch(pthreadpool_t threadpool,pthreadpool_task_2d_tile_2d_with_id_t task,void * argument,uint32_t default_uarch_index,uint32_t max_uarch_index,size_t range_i,size_t range_j,size_t tile_i,size_t tile_j,uint32_t flags)116*b095b053SXin Li void pthreadpool_parallelize_2d_tile_2d_with_uarch(
117*b095b053SXin Li 	pthreadpool_t threadpool,
118*b095b053SXin Li 	pthreadpool_task_2d_tile_2d_with_id_t task,
119*b095b053SXin Li 	void* argument,
120*b095b053SXin Li 	uint32_t default_uarch_index,
121*b095b053SXin Li 	uint32_t max_uarch_index,
122*b095b053SXin Li 	size_t range_i,
123*b095b053SXin Li 	size_t range_j,
124*b095b053SXin Li 	size_t tile_i,
125*b095b053SXin Li 	size_t tile_j,
126*b095b053SXin Li 	uint32_t flags)
127*b095b053SXin Li {
128*b095b053SXin Li 	for (size_t i = 0; i < range_i; i += tile_i) {
129*b095b053SXin Li 		for (size_t j = 0; j < range_j; j += tile_j) {
130*b095b053SXin Li 			task(argument, default_uarch_index, i, j,
131*b095b053SXin Li 				min(range_i - i, tile_i), min(range_j - j, tile_j));
132*b095b053SXin Li 		}
133*b095b053SXin Li 	}
134*b095b053SXin Li }
135*b095b053SXin Li 
pthreadpool_parallelize_3d(pthreadpool_t threadpool,pthreadpool_task_3d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,uint32_t flags)136*b095b053SXin Li void pthreadpool_parallelize_3d(
137*b095b053SXin Li 	pthreadpool_t threadpool,
138*b095b053SXin Li 	pthreadpool_task_3d_t task,
139*b095b053SXin Li 	void* argument,
140*b095b053SXin Li 	size_t range_i,
141*b095b053SXin Li 	size_t range_j,
142*b095b053SXin Li 	size_t range_k,
143*b095b053SXin Li 	uint32_t flags)
144*b095b053SXin Li {
145*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
146*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
147*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
148*b095b053SXin Li 				task(argument, i, j, k);
149*b095b053SXin Li 			}
150*b095b053SXin Li 		}
151*b095b053SXin Li 	}
152*b095b053SXin Li }
153*b095b053SXin Li 
pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,pthreadpool_task_3d_tile_1d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t tile_k,uint32_t flags)154*b095b053SXin Li void pthreadpool_parallelize_3d_tile_1d(
155*b095b053SXin Li 	pthreadpool_t threadpool,
156*b095b053SXin Li 	pthreadpool_task_3d_tile_1d_t task,
157*b095b053SXin Li 	void* argument,
158*b095b053SXin Li 	size_t range_i,
159*b095b053SXin Li 	size_t range_j,
160*b095b053SXin Li 	size_t range_k,
161*b095b053SXin Li 	size_t tile_k,
162*b095b053SXin Li 	uint32_t flags)
163*b095b053SXin Li {
164*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
165*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
166*b095b053SXin Li 			for (size_t k = 0; k < range_k; k += tile_k) {
167*b095b053SXin Li 				task(argument, i, j, k, min(range_k - k, tile_k));
168*b095b053SXin Li 			}
169*b095b053SXin Li 		}
170*b095b053SXin Li 	}
171*b095b053SXin Li }
172*b095b053SXin Li 
pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,pthreadpool_task_3d_tile_2d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t tile_j,size_t tile_k,uint32_t flags)173*b095b053SXin Li void pthreadpool_parallelize_3d_tile_2d(
174*b095b053SXin Li 	pthreadpool_t threadpool,
175*b095b053SXin Li 	pthreadpool_task_3d_tile_2d_t task,
176*b095b053SXin Li 	void* argument,
177*b095b053SXin Li 	size_t range_i,
178*b095b053SXin Li 	size_t range_j,
179*b095b053SXin Li 	size_t range_k,
180*b095b053SXin Li 	size_t tile_j,
181*b095b053SXin Li 	size_t tile_k,
182*b095b053SXin Li 	uint32_t flags)
183*b095b053SXin Li {
184*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
185*b095b053SXin Li 		for (size_t j = 0; j < range_j; j += tile_j) {
186*b095b053SXin Li 			for (size_t k = 0; k < range_k; k += tile_k) {
187*b095b053SXin Li 				task(argument, i, j, k,
188*b095b053SXin Li 					min(range_j - j, tile_j), min(range_k - k, tile_k));
189*b095b053SXin Li 			}
190*b095b053SXin Li 		}
191*b095b053SXin Li 	}
192*b095b053SXin Li }
193*b095b053SXin Li 
pthreadpool_parallelize_3d_tile_2d_with_uarch(pthreadpool_t threadpool,pthreadpool_task_3d_tile_2d_with_id_t task,void * argument,uint32_t default_uarch_index,uint32_t max_uarch_index,size_t range_i,size_t range_j,size_t range_k,size_t tile_j,size_t tile_k,uint32_t flags)194*b095b053SXin Li void pthreadpool_parallelize_3d_tile_2d_with_uarch(
195*b095b053SXin Li 	pthreadpool_t threadpool,
196*b095b053SXin Li 	pthreadpool_task_3d_tile_2d_with_id_t task,
197*b095b053SXin Li 	void* argument,
198*b095b053SXin Li 	uint32_t default_uarch_index,
199*b095b053SXin Li 	uint32_t max_uarch_index,
200*b095b053SXin Li 	size_t range_i,
201*b095b053SXin Li 	size_t range_j,
202*b095b053SXin Li 	size_t range_k,
203*b095b053SXin Li 	size_t tile_j,
204*b095b053SXin Li 	size_t tile_k,
205*b095b053SXin Li 	uint32_t flags)
206*b095b053SXin Li {
207*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
208*b095b053SXin Li 		for (size_t j = 0; j < range_j; j += tile_j) {
209*b095b053SXin Li 			for (size_t k = 0; k < range_k; k += tile_k) {
210*b095b053SXin Li 				task(argument, default_uarch_index, i, j, k,
211*b095b053SXin Li 					min(range_j - j, tile_j), min(range_k - k, tile_k));
212*b095b053SXin Li 			}
213*b095b053SXin Li 		}
214*b095b053SXin Li 	}
215*b095b053SXin Li }
216*b095b053SXin Li 
pthreadpool_parallelize_4d(pthreadpool_t threadpool,pthreadpool_task_4d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,uint32_t flags)217*b095b053SXin Li void pthreadpool_parallelize_4d(
218*b095b053SXin Li 	pthreadpool_t threadpool,
219*b095b053SXin Li 	pthreadpool_task_4d_t task,
220*b095b053SXin Li 	void* argument,
221*b095b053SXin Li 	size_t range_i,
222*b095b053SXin Li 	size_t range_j,
223*b095b053SXin Li 	size_t range_k,
224*b095b053SXin Li 	size_t range_l,
225*b095b053SXin Li 	uint32_t flags)
226*b095b053SXin Li {
227*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
228*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
229*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
230*b095b053SXin Li 				for (size_t l = 0; l < range_l; l++) {
231*b095b053SXin Li 					task(argument, i, j, k, l);
232*b095b053SXin Li 				}
233*b095b053SXin Li 			}
234*b095b053SXin Li 		}
235*b095b053SXin Li 	}
236*b095b053SXin Li }
237*b095b053SXin Li 
pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,pthreadpool_task_4d_tile_1d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t tile_l,uint32_t flags)238*b095b053SXin Li void pthreadpool_parallelize_4d_tile_1d(
239*b095b053SXin Li 	pthreadpool_t threadpool,
240*b095b053SXin Li 	pthreadpool_task_4d_tile_1d_t task,
241*b095b053SXin Li 	void* argument,
242*b095b053SXin Li 	size_t range_i,
243*b095b053SXin Li 	size_t range_j,
244*b095b053SXin Li 	size_t range_k,
245*b095b053SXin Li 	size_t range_l,
246*b095b053SXin Li 	size_t tile_l,
247*b095b053SXin Li 	uint32_t flags)
248*b095b053SXin Li {
249*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
250*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
251*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
252*b095b053SXin Li 				for (size_t l = 0; l < range_l; l += tile_l) {
253*b095b053SXin Li 					task(argument, i, j, k, l, min(range_l - l, tile_l));
254*b095b053SXin Li 				}
255*b095b053SXin Li 			}
256*b095b053SXin Li 		}
257*b095b053SXin Li 	}
258*b095b053SXin Li }
259*b095b053SXin Li 
pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,pthreadpool_task_4d_tile_2d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t tile_k,size_t tile_l,uint32_t flags)260*b095b053SXin Li void pthreadpool_parallelize_4d_tile_2d(
261*b095b053SXin Li 	pthreadpool_t threadpool,
262*b095b053SXin Li 	pthreadpool_task_4d_tile_2d_t task,
263*b095b053SXin Li 	void* argument,
264*b095b053SXin Li 	size_t range_i,
265*b095b053SXin Li 	size_t range_j,
266*b095b053SXin Li 	size_t range_k,
267*b095b053SXin Li 	size_t range_l,
268*b095b053SXin Li 	size_t tile_k,
269*b095b053SXin Li 	size_t tile_l,
270*b095b053SXin Li 	uint32_t flags)
271*b095b053SXin Li {
272*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
273*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
274*b095b053SXin Li 			for (size_t k = 0; k < range_k; k += tile_k) {
275*b095b053SXin Li 				for (size_t l = 0; l < range_l; l += tile_l) {
276*b095b053SXin Li 					task(argument, i, j, k, l,
277*b095b053SXin Li 						min(range_k - k, tile_k), min(range_l - l, tile_l));
278*b095b053SXin Li 				}
279*b095b053SXin Li 			}
280*b095b053SXin Li 		}
281*b095b053SXin Li 	}
282*b095b053SXin Li }
283*b095b053SXin Li 
pthreadpool_parallelize_4d_tile_2d_with_uarch(pthreadpool_t threadpool,pthreadpool_task_4d_tile_2d_with_id_t task,void * argument,uint32_t default_uarch_index,uint32_t max_uarch_index,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t tile_k,size_t tile_l,uint32_t flags)284*b095b053SXin Li void pthreadpool_parallelize_4d_tile_2d_with_uarch(
285*b095b053SXin Li 	pthreadpool_t threadpool,
286*b095b053SXin Li 	pthreadpool_task_4d_tile_2d_with_id_t task,
287*b095b053SXin Li 	void* argument,
288*b095b053SXin Li 	uint32_t default_uarch_index,
289*b095b053SXin Li 	uint32_t max_uarch_index,
290*b095b053SXin Li 	size_t range_i,
291*b095b053SXin Li 	size_t range_j,
292*b095b053SXin Li 	size_t range_k,
293*b095b053SXin Li 	size_t range_l,
294*b095b053SXin Li 	size_t tile_k,
295*b095b053SXin Li 	size_t tile_l,
296*b095b053SXin Li 	uint32_t flags)
297*b095b053SXin Li {
298*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
299*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
300*b095b053SXin Li 			for (size_t k = 0; k < range_k; k += tile_k) {
301*b095b053SXin Li 				for (size_t l = 0; l < range_l; l += tile_l) {
302*b095b053SXin Li 					task(argument, default_uarch_index, i, j, k, l,
303*b095b053SXin Li 						min(range_k - k, tile_k), min(range_l - l, tile_l));
304*b095b053SXin Li 				}
305*b095b053SXin Li 			}
306*b095b053SXin Li 		}
307*b095b053SXin Li 	}
308*b095b053SXin Li }
309*b095b053SXin Li 
pthreadpool_parallelize_5d(pthreadpool_t threadpool,pthreadpool_task_5d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t range_m,uint32_t flags)310*b095b053SXin Li void pthreadpool_parallelize_5d(
311*b095b053SXin Li 	pthreadpool_t threadpool,
312*b095b053SXin Li 	pthreadpool_task_5d_t task,
313*b095b053SXin Li 	void* argument,
314*b095b053SXin Li 	size_t range_i,
315*b095b053SXin Li 	size_t range_j,
316*b095b053SXin Li 	size_t range_k,
317*b095b053SXin Li 	size_t range_l,
318*b095b053SXin Li 	size_t range_m,
319*b095b053SXin Li 	uint32_t flags)
320*b095b053SXin Li {
321*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
322*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
323*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
324*b095b053SXin Li 				for (size_t l = 0; l < range_l; l++) {
325*b095b053SXin Li 					for (size_t m = 0; m < range_m; m++) {
326*b095b053SXin Li 						task(argument, i, j, k, l, m);
327*b095b053SXin Li 					}
328*b095b053SXin Li 				}
329*b095b053SXin Li 			}
330*b095b053SXin Li 		}
331*b095b053SXin Li 	}
332*b095b053SXin Li }
333*b095b053SXin Li 
pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,pthreadpool_task_5d_tile_1d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t range_m,size_t tile_m,uint32_t flags)334*b095b053SXin Li void pthreadpool_parallelize_5d_tile_1d(
335*b095b053SXin Li 	pthreadpool_t threadpool,
336*b095b053SXin Li 	pthreadpool_task_5d_tile_1d_t task,
337*b095b053SXin Li 	void* argument,
338*b095b053SXin Li 	size_t range_i,
339*b095b053SXin Li 	size_t range_j,
340*b095b053SXin Li 	size_t range_k,
341*b095b053SXin Li 	size_t range_l,
342*b095b053SXin Li 	size_t range_m,
343*b095b053SXin Li 	size_t tile_m,
344*b095b053SXin Li 	uint32_t flags)
345*b095b053SXin Li {
346*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
347*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
348*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
349*b095b053SXin Li 				for (size_t l = 0; l < range_l; l++) {
350*b095b053SXin Li 					for (size_t m = 0; m < range_m; m += tile_m) {
351*b095b053SXin Li 						task(argument, i, j, k, l, m, min(range_m - m, tile_m));
352*b095b053SXin Li 					}
353*b095b053SXin Li 				}
354*b095b053SXin Li 			}
355*b095b053SXin Li 		}
356*b095b053SXin Li 	}
357*b095b053SXin Li }
358*b095b053SXin Li 
pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,pthreadpool_task_5d_tile_2d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t range_m,size_t tile_l,size_t tile_m,uint32_t flags)359*b095b053SXin Li void pthreadpool_parallelize_5d_tile_2d(
360*b095b053SXin Li 	pthreadpool_t threadpool,
361*b095b053SXin Li 	pthreadpool_task_5d_tile_2d_t task,
362*b095b053SXin Li 	void* argument,
363*b095b053SXin Li 	size_t range_i,
364*b095b053SXin Li 	size_t range_j,
365*b095b053SXin Li 	size_t range_k,
366*b095b053SXin Li 	size_t range_l,
367*b095b053SXin Li 	size_t range_m,
368*b095b053SXin Li 	size_t tile_l,
369*b095b053SXin Li 	size_t tile_m,
370*b095b053SXin Li 	uint32_t flags)
371*b095b053SXin Li {
372*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
373*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
374*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
375*b095b053SXin Li 				for (size_t l = 0; l < range_l; l += tile_l) {
376*b095b053SXin Li 					for (size_t m = 0; m < range_m; m += tile_m) {
377*b095b053SXin Li 						task(argument, i, j, k, l, m,
378*b095b053SXin Li 							min(range_l - l, tile_l), min(range_m - m, tile_m));
379*b095b053SXin Li 					}
380*b095b053SXin Li 				}
381*b095b053SXin Li 			}
382*b095b053SXin Li 		}
383*b095b053SXin Li 	}
384*b095b053SXin Li }
385*b095b053SXin Li 
pthreadpool_parallelize_6d(pthreadpool_t threadpool,pthreadpool_task_6d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t range_m,size_t range_n,uint32_t flags)386*b095b053SXin Li void pthreadpool_parallelize_6d(
387*b095b053SXin Li 	pthreadpool_t threadpool,
388*b095b053SXin Li 	pthreadpool_task_6d_t task,
389*b095b053SXin Li 	void* argument,
390*b095b053SXin Li 	size_t range_i,
391*b095b053SXin Li 	size_t range_j,
392*b095b053SXin Li 	size_t range_k,
393*b095b053SXin Li 	size_t range_l,
394*b095b053SXin Li 	size_t range_m,
395*b095b053SXin Li 	size_t range_n,
396*b095b053SXin Li 	uint32_t flags)
397*b095b053SXin Li {
398*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
399*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
400*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
401*b095b053SXin Li 				for (size_t l = 0; l < range_l; l++) {
402*b095b053SXin Li 					for (size_t m = 0; m < range_m; m++) {
403*b095b053SXin Li 						for (size_t n = 0; n < range_n; n++) {
404*b095b053SXin Li 							task(argument, i, j, k, l, m, n);
405*b095b053SXin Li 						}
406*b095b053SXin Li 					}
407*b095b053SXin Li 				}
408*b095b053SXin Li 			}
409*b095b053SXin Li 		}
410*b095b053SXin Li 	}
411*b095b053SXin Li }
412*b095b053SXin Li 
pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,pthreadpool_task_6d_tile_1d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t range_m,size_t range_n,size_t tile_n,uint32_t flags)413*b095b053SXin Li void pthreadpool_parallelize_6d_tile_1d(
414*b095b053SXin Li 	pthreadpool_t threadpool,
415*b095b053SXin Li 	pthreadpool_task_6d_tile_1d_t task,
416*b095b053SXin Li 	void* argument,
417*b095b053SXin Li 	size_t range_i,
418*b095b053SXin Li 	size_t range_j,
419*b095b053SXin Li 	size_t range_k,
420*b095b053SXin Li 	size_t range_l,
421*b095b053SXin Li 	size_t range_m,
422*b095b053SXin Li 	size_t range_n,
423*b095b053SXin Li 	size_t tile_n,
424*b095b053SXin Li 	uint32_t flags)
425*b095b053SXin Li {
426*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
427*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
428*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
429*b095b053SXin Li 				for (size_t l = 0; l < range_l; l++) {
430*b095b053SXin Li 					for (size_t m = 0; m < range_m; m++) {
431*b095b053SXin Li 						for (size_t n = 0; n < range_n; n += tile_n) {
432*b095b053SXin Li 							task(argument, i, j, k, l, m, n, min(range_n - n, tile_n));
433*b095b053SXin Li 						}
434*b095b053SXin Li 					}
435*b095b053SXin Li 				}
436*b095b053SXin Li 			}
437*b095b053SXin Li 		}
438*b095b053SXin Li 	}
439*b095b053SXin Li }
440*b095b053SXin Li 
pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool,pthreadpool_task_6d_tile_2d_t task,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t range_m,size_t range_n,size_t tile_m,size_t tile_n,uint32_t flags)441*b095b053SXin Li void pthreadpool_parallelize_6d_tile_2d(
442*b095b053SXin Li 	pthreadpool_t threadpool,
443*b095b053SXin Li 	pthreadpool_task_6d_tile_2d_t task,
444*b095b053SXin Li 	void* argument,
445*b095b053SXin Li 	size_t range_i,
446*b095b053SXin Li 	size_t range_j,
447*b095b053SXin Li 	size_t range_k,
448*b095b053SXin Li 	size_t range_l,
449*b095b053SXin Li 	size_t range_m,
450*b095b053SXin Li 	size_t range_n,
451*b095b053SXin Li 	size_t tile_m,
452*b095b053SXin Li 	size_t tile_n,
453*b095b053SXin Li 	uint32_t flags)
454*b095b053SXin Li {
455*b095b053SXin Li 	for (size_t i = 0; i < range_i; i++) {
456*b095b053SXin Li 		for (size_t j = 0; j < range_j; j++) {
457*b095b053SXin Li 			for (size_t k = 0; k < range_k; k++) {
458*b095b053SXin Li 				for (size_t l = 0; l < range_l; l++) {
459*b095b053SXin Li 					for (size_t m = 0; m < range_m; m += tile_m) {
460*b095b053SXin Li 						for (size_t n = 0; n < range_n; n += tile_n) {
461*b095b053SXin Li 							task(argument, i, j, k, l, m, n,
462*b095b053SXin Li 								min(range_m - m, tile_m), min(range_n - n, tile_n));
463*b095b053SXin Li 						}
464*b095b053SXin Li 					}
465*b095b053SXin Li 				}
466*b095b053SXin Li 			}
467*b095b053SXin Li 		}
468*b095b053SXin Li 	}
469*b095b053SXin Li }
470*b095b053SXin Li 
pthreadpool_destroy(struct pthreadpool * threadpool)471*b095b053SXin Li void pthreadpool_destroy(struct pthreadpool* threadpool) {
472*b095b053SXin Li }
473