xref: /aosp_15_r20/external/pthreadpool/src/legacy-api.c (revision b095b0533730c2930f947df924a4486d266faa1a)
1*b095b053SXin Li /* Standard C headers */
2*b095b053SXin Li #include <stddef.h>
3*b095b053SXin Li 
4*b095b053SXin Li /* Dependencies */
5*b095b053SXin Li #include <fxdiv.h>
6*b095b053SXin Li 
7*b095b053SXin Li /* Public library header */
8*b095b053SXin Li #include <pthreadpool.h>
9*b095b053SXin Li 
10*b095b053SXin Li /* Internal library headers */
11*b095b053SXin Li #include "threadpool-utils.h"
12*b095b053SXin Li 
13*b095b053SXin Li 
pthreadpool_compute_1d(pthreadpool_t threadpool,pthreadpool_function_1d_t function,void * argument,size_t range)14*b095b053SXin Li void pthreadpool_compute_1d(
15*b095b053SXin Li 	pthreadpool_t threadpool,
16*b095b053SXin Li 	pthreadpool_function_1d_t function,
17*b095b053SXin Li 	void* argument,
18*b095b053SXin Li 	size_t range)
19*b095b053SXin Li {
20*b095b053SXin Li 	pthreadpool_parallelize_1d(threadpool,
21*b095b053SXin Li 		(pthreadpool_task_1d_t) function, argument,
22*b095b053SXin Li 		range, 0 /* flags */);
23*b095b053SXin Li }
24*b095b053SXin Li 
pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,pthreadpool_function_1d_tiled_t function,void * argument,size_t range,size_t tile)25*b095b053SXin Li void pthreadpool_compute_1d_tiled(
26*b095b053SXin Li 	pthreadpool_t threadpool,
27*b095b053SXin Li 	pthreadpool_function_1d_tiled_t function,
28*b095b053SXin Li 	void* argument,
29*b095b053SXin Li 	size_t range,
30*b095b053SXin Li 	size_t tile)
31*b095b053SXin Li {
32*b095b053SXin Li 	pthreadpool_parallelize_1d_tile_1d(threadpool,
33*b095b053SXin Li 		(pthreadpool_task_1d_tile_1d_t) function, argument,
34*b095b053SXin Li 		range, tile, 0 /* flags */);
35*b095b053SXin Li }
36*b095b053SXin Li 
pthreadpool_compute_2d(pthreadpool_t threadpool,pthreadpool_function_2d_t function,void * argument,size_t range_i,size_t range_j)37*b095b053SXin Li void pthreadpool_compute_2d(
38*b095b053SXin Li 	pthreadpool_t threadpool,
39*b095b053SXin Li 	pthreadpool_function_2d_t function,
40*b095b053SXin Li 	void* argument,
41*b095b053SXin Li 	size_t range_i,
42*b095b053SXin Li 	size_t range_j)
43*b095b053SXin Li {
44*b095b053SXin Li 	pthreadpool_parallelize_2d(threadpool,
45*b095b053SXin Li 		(pthreadpool_task_2d_t) function, argument,
46*b095b053SXin Li 		range_i, range_j, 0 /* flags */);
47*b095b053SXin Li }
48*b095b053SXin Li 
pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,pthreadpool_function_2d_tiled_t function,void * argument,size_t range_i,size_t range_j,size_t tile_i,size_t tile_j)49*b095b053SXin Li void pthreadpool_compute_2d_tiled(
50*b095b053SXin Li 	pthreadpool_t threadpool,
51*b095b053SXin Li 	pthreadpool_function_2d_tiled_t function,
52*b095b053SXin Li 	void* argument,
53*b095b053SXin Li 	size_t range_i,
54*b095b053SXin Li 	size_t range_j,
55*b095b053SXin Li 	size_t tile_i,
56*b095b053SXin Li 	size_t tile_j)
57*b095b053SXin Li {
58*b095b053SXin Li 	pthreadpool_parallelize_2d_tile_2d(threadpool,
59*b095b053SXin Li 		(pthreadpool_task_2d_tile_2d_t) function, argument,
60*b095b053SXin Li 		range_i, range_j, tile_i, tile_j, 0 /* flags */);
61*b095b053SXin Li }
62*b095b053SXin Li 
63*b095b053SXin Li struct compute_3d_tiled_context {
64*b095b053SXin Li 	pthreadpool_function_3d_tiled_t function;
65*b095b053SXin Li 	void* argument;
66*b095b053SXin Li 	struct fxdiv_divisor_size_t tile_range_j;
67*b095b053SXin Li 	struct fxdiv_divisor_size_t tile_range_k;
68*b095b053SXin Li 	size_t range_i;
69*b095b053SXin Li 	size_t range_j;
70*b095b053SXin Li 	size_t range_k;
71*b095b053SXin Li 	size_t tile_i;
72*b095b053SXin Li 	size_t tile_j;
73*b095b053SXin Li 	size_t tile_k;
74*b095b053SXin Li };
75*b095b053SXin Li 
compute_3d_tiled(const struct compute_3d_tiled_context * context,size_t linear_index)76*b095b053SXin Li static void compute_3d_tiled(const struct compute_3d_tiled_context* context, size_t linear_index) {
77*b095b053SXin Li 	const struct fxdiv_divisor_size_t tile_range_k = context->tile_range_k;
78*b095b053SXin Li 	const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
79*b095b053SXin Li 	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
80*b095b053SXin Li 	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
81*b095b053SXin Li 	const size_t max_tile_i = context->tile_i;
82*b095b053SXin Li 	const size_t max_tile_j = context->tile_j;
83*b095b053SXin Li 	const size_t max_tile_k = context->tile_k;
84*b095b053SXin Li 	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
85*b095b053SXin Li 	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
86*b095b053SXin Li 	const size_t index_k = tile_index_ij_k.remainder * max_tile_k;
87*b095b053SXin Li 	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
88*b095b053SXin Li 	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
89*b095b053SXin Li 	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
90*b095b053SXin Li 	context->function(context->argument, index_i, index_j, index_k, tile_i, tile_j, tile_k);
91*b095b053SXin Li }
92*b095b053SXin Li 
pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,pthreadpool_function_3d_tiled_t function,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t tile_i,size_t tile_j,size_t tile_k)93*b095b053SXin Li void pthreadpool_compute_3d_tiled(
94*b095b053SXin Li 	pthreadpool_t threadpool,
95*b095b053SXin Li 	pthreadpool_function_3d_tiled_t function,
96*b095b053SXin Li 	void* argument,
97*b095b053SXin Li 	size_t range_i,
98*b095b053SXin Li 	size_t range_j,
99*b095b053SXin Li 	size_t range_k,
100*b095b053SXin Li 	size_t tile_i,
101*b095b053SXin Li 	size_t tile_j,
102*b095b053SXin Li 	size_t tile_k)
103*b095b053SXin Li {
104*b095b053SXin Li 	if (pthreadpool_get_threads_count(threadpool) <= 1) {
105*b095b053SXin Li 		/* No thread pool used: execute function sequentially on the calling thread */
106*b095b053SXin Li 		for (size_t i = 0; i < range_i; i += tile_i) {
107*b095b053SXin Li 			for (size_t j = 0; j < range_j; j += tile_j) {
108*b095b053SXin Li 				for (size_t k = 0; k < range_k; k += tile_k) {
109*b095b053SXin Li 					function(argument, i, j, k, min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k));
110*b095b053SXin Li 				}
111*b095b053SXin Li 			}
112*b095b053SXin Li 		}
113*b095b053SXin Li 	} else {
114*b095b053SXin Li 		/* Execute in parallel on the thread pool using linearized index */
115*b095b053SXin Li 		const size_t tile_range_i = divide_round_up(range_i, tile_i);
116*b095b053SXin Li 		const size_t tile_range_j = divide_round_up(range_j, tile_j);
117*b095b053SXin Li 		const size_t tile_range_k = divide_round_up(range_k, tile_k);
118*b095b053SXin Li 		struct compute_3d_tiled_context context = {
119*b095b053SXin Li 			.function = function,
120*b095b053SXin Li 			.argument = argument,
121*b095b053SXin Li 			.tile_range_j = fxdiv_init_size_t(tile_range_j),
122*b095b053SXin Li 			.tile_range_k = fxdiv_init_size_t(tile_range_k),
123*b095b053SXin Li 			.range_i = range_i,
124*b095b053SXin Li 			.range_j = range_j,
125*b095b053SXin Li 			.range_k = range_k,
126*b095b053SXin Li 			.tile_i = tile_i,
127*b095b053SXin Li 			.tile_j = tile_j,
128*b095b053SXin Li 			.tile_k = tile_k
129*b095b053SXin Li 		};
130*b095b053SXin Li 		pthreadpool_parallelize_1d(threadpool,
131*b095b053SXin Li 			(pthreadpool_task_1d_t) compute_3d_tiled, &context,
132*b095b053SXin Li 			tile_range_i * tile_range_j * tile_range_k,
133*b095b053SXin Li 			0 /* flags */);
134*b095b053SXin Li 	}
135*b095b053SXin Li }
136*b095b053SXin Li 
137*b095b053SXin Li struct compute_4d_tiled_context {
138*b095b053SXin Li 	pthreadpool_function_4d_tiled_t function;
139*b095b053SXin Li 	void* argument;
140*b095b053SXin Li 	struct fxdiv_divisor_size_t tile_range_kl;
141*b095b053SXin Li 	struct fxdiv_divisor_size_t tile_range_j;
142*b095b053SXin Li 	struct fxdiv_divisor_size_t tile_range_l;
143*b095b053SXin Li 	size_t range_i;
144*b095b053SXin Li 	size_t range_j;
145*b095b053SXin Li 	size_t range_k;
146*b095b053SXin Li 	size_t range_l;
147*b095b053SXin Li 	size_t tile_i;
148*b095b053SXin Li 	size_t tile_j;
149*b095b053SXin Li 	size_t tile_k;
150*b095b053SXin Li 	size_t tile_l;
151*b095b053SXin Li };
152*b095b053SXin Li 
compute_4d_tiled(const struct compute_4d_tiled_context * context,size_t linear_index)153*b095b053SXin Li static void compute_4d_tiled(const struct compute_4d_tiled_context* context, size_t linear_index) {
154*b095b053SXin Li 	const struct fxdiv_divisor_size_t tile_range_kl = context->tile_range_kl;
155*b095b053SXin Li 	const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
156*b095b053SXin Li 	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
157*b095b053SXin Li 	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, tile_range_j);
158*b095b053SXin Li 	const struct fxdiv_divisor_size_t tile_range_l = context->tile_range_l;
159*b095b053SXin Li 	const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
160*b095b053SXin Li 	const size_t max_tile_i = context->tile_i;
161*b095b053SXin Li 	const size_t max_tile_j = context->tile_j;
162*b095b053SXin Li 	const size_t max_tile_k = context->tile_k;
163*b095b053SXin Li 	const size_t max_tile_l = context->tile_l;
164*b095b053SXin Li 	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
165*b095b053SXin Li 	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
166*b095b053SXin Li 	const size_t index_k = tile_index_k_l.quotient * max_tile_k;
167*b095b053SXin Li 	const size_t index_l = tile_index_k_l.remainder * max_tile_l;
168*b095b053SXin Li 	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
169*b095b053SXin Li 	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
170*b095b053SXin Li 	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
171*b095b053SXin Li 	const size_t tile_l = min(max_tile_l, context->range_l - index_l);
172*b095b053SXin Li 	context->function(context->argument, index_i, index_j, index_k, index_l, tile_i, tile_j, tile_k, tile_l);
173*b095b053SXin Li }
174*b095b053SXin Li 
pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,pthreadpool_function_4d_tiled_t function,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t tile_i,size_t tile_j,size_t tile_k,size_t tile_l)175*b095b053SXin Li void pthreadpool_compute_4d_tiled(
176*b095b053SXin Li 	pthreadpool_t threadpool,
177*b095b053SXin Li 	pthreadpool_function_4d_tiled_t function,
178*b095b053SXin Li 	void* argument,
179*b095b053SXin Li 	size_t range_i,
180*b095b053SXin Li 	size_t range_j,
181*b095b053SXin Li 	size_t range_k,
182*b095b053SXin Li 	size_t range_l,
183*b095b053SXin Li 	size_t tile_i,
184*b095b053SXin Li 	size_t tile_j,
185*b095b053SXin Li 	size_t tile_k,
186*b095b053SXin Li 	size_t tile_l)
187*b095b053SXin Li {
188*b095b053SXin Li 	if (pthreadpool_get_threads_count(threadpool) <= 1) {
189*b095b053SXin Li 		/* No thread pool used: execute function sequentially on the calling thread */
190*b095b053SXin Li 		for (size_t i = 0; i < range_i; i += tile_i) {
191*b095b053SXin Li 			for (size_t j = 0; j < range_j; j += tile_j) {
192*b095b053SXin Li 				for (size_t k = 0; k < range_k; k += tile_k) {
193*b095b053SXin Li 					for (size_t l = 0; l < range_l; l += tile_l) {
194*b095b053SXin Li 						function(argument, i, j, k, l,
195*b095b053SXin Li 							min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k), min(range_l - l, tile_l));
196*b095b053SXin Li 					}
197*b095b053SXin Li 				}
198*b095b053SXin Li 			}
199*b095b053SXin Li 		}
200*b095b053SXin Li 	} else {
201*b095b053SXin Li 		/* Execute in parallel on the thread pool using linearized index */
202*b095b053SXin Li 		const size_t tile_range_i = divide_round_up(range_i, tile_i);
203*b095b053SXin Li 		const size_t tile_range_j = divide_round_up(range_j, tile_j);
204*b095b053SXin Li 		const size_t tile_range_k = divide_round_up(range_k, tile_k);
205*b095b053SXin Li 		const size_t tile_range_l = divide_round_up(range_l, tile_l);
206*b095b053SXin Li 		struct compute_4d_tiled_context context = {
207*b095b053SXin Li 			.function = function,
208*b095b053SXin Li 			.argument = argument,
209*b095b053SXin Li 			.tile_range_kl = fxdiv_init_size_t(tile_range_k * tile_range_l),
210*b095b053SXin Li 			.tile_range_j = fxdiv_init_size_t(tile_range_j),
211*b095b053SXin Li 			.tile_range_l = fxdiv_init_size_t(tile_range_l),
212*b095b053SXin Li 			.range_i = range_i,
213*b095b053SXin Li 			.range_j = range_j,
214*b095b053SXin Li 			.range_k = range_k,
215*b095b053SXin Li 			.range_l = range_l,
216*b095b053SXin Li 			.tile_i = tile_i,
217*b095b053SXin Li 			.tile_j = tile_j,
218*b095b053SXin Li 			.tile_k = tile_k,
219*b095b053SXin Li 			.tile_l = tile_l
220*b095b053SXin Li 		};
221*b095b053SXin Li 		pthreadpool_parallelize_1d(threadpool,
222*b095b053SXin Li 			(pthreadpool_task_1d_t) compute_4d_tiled, &context,
223*b095b053SXin Li 			tile_range_i * tile_range_j * tile_range_k * tile_range_l,
224*b095b053SXin Li 			0 /* flags */);
225*b095b053SXin Li 	}
226*b095b053SXin Li }
227