1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22
23
create_resize_bilinear2d_nhwc(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * resize_op_out)24 static enum xnn_status create_resize_bilinear2d_nhwc(
25 size_t channels,
26 size_t input_pixel_stride,
27 size_t output_pixel_stride,
28 uint32_t flags,
29 uint32_t datatype_init_flags,
30 enum xnn_operator_type operator_type,
31 xnn_operator_t* resize_op_out)
32 {
33 xnn_operator_t resize_op = NULL;
34 enum xnn_status status = xnn_status_uninitialized;
35
36 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
37 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
38 xnn_operator_type_to_string(operator_type));
39 goto error;
40 }
41
42 status = xnn_status_unsupported_hardware;
43
44 if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
45 xnn_log_error("failed to create %s operator: operations on data type are not supported",
46 xnn_operator_type_to_string(operator_type));
47 goto error;
48 }
49
50 status = xnn_status_invalid_parameter;
51
52 if (channels == 0) {
53 xnn_log_error(
54 "failed to create %s operator with %zu channels: number of channels must be non-zero",
55 xnn_operator_type_to_string(operator_type), channels);
56 goto error;
57 }
58
59 if (input_pixel_stride < channels) {
60 xnn_log_error(
61 "failed to create %s operator with input pixel stride of %zu: "
62 "stride must be at least as large as the number of channels (%zu)",
63 xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
64 goto error;
65 }
66
67 if (output_pixel_stride < channels) {
68 xnn_log_error(
69 "failed to create %s operator with output pixel stride of %zu: "
70 "stride must be at least as large as the number of channels (%zu)",
71 xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
72 goto error;
73 }
74
75 status = xnn_status_out_of_memory;
76
77 resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
78 if (resize_op == NULL) {
79 xnn_log_error(
80 "failed to allocate %zu bytes for %s operator descriptor",
81 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
82 goto error;
83 }
84
85 resize_op->channels = channels;
86 resize_op->input_pixel_stride = input_pixel_stride;
87 resize_op->output_pixel_stride = output_pixel_stride;
88
89 resize_op->type = operator_type;
90 resize_op->flags = flags;
91
92 resize_op->state = xnn_run_state_invalid;
93
94 *resize_op_out = resize_op;
95 return xnn_status_success;
96
97 error:
98 xnn_delete_operator(resize_op);
99 return status;
100 }
101
xnn_create_resize_bilinear2d_nhwc_f16(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)102 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16(
103 size_t channels,
104 size_t input_pixel_stride,
105 size_t output_pixel_stride,
106 uint32_t flags,
107 xnn_operator_t* resize_op_out)
108 {
109 return create_resize_bilinear2d_nhwc(
110 channels,
111 input_pixel_stride,
112 output_pixel_stride,
113 flags,
114 XNN_INIT_FLAG_F16,
115 xnn_operator_type_resize_bilinear_nhwc_f16,
116 resize_op_out);
117 }
118
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)119 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
120 size_t channels,
121 size_t input_pixel_stride,
122 size_t output_pixel_stride,
123 uint32_t flags,
124 xnn_operator_t* resize_op_out)
125 {
126 return create_resize_bilinear2d_nhwc(
127 channels,
128 input_pixel_stride,
129 output_pixel_stride,
130 flags,
131 XNN_INIT_FLAG_F32,
132 xnn_operator_type_resize_bilinear_nhwc_f32,
133 resize_op_out);
134 }
135
xnn_create_resize_bilinear2d_nhwc_s8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)136 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
137 size_t channels,
138 size_t input_pixel_stride,
139 size_t output_pixel_stride,
140 uint32_t flags,
141 xnn_operator_t* resize_op_out)
142 {
143 return create_resize_bilinear2d_nhwc(
144 channels,
145 input_pixel_stride,
146 output_pixel_stride,
147 flags,
148 XNN_INIT_FLAG_S8,
149 xnn_operator_type_resize_bilinear_nhwc_s8,
150 resize_op_out);
151 }
152
xnn_create_resize_bilinear2d_nhwc_u8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)153 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
154 size_t channels,
155 size_t input_pixel_stride,
156 size_t output_pixel_stride,
157 uint32_t flags,
158 xnn_operator_t* resize_op_out)
159 {
160 return create_resize_bilinear2d_nhwc(
161 channels,
162 input_pixel_stride,
163 output_pixel_stride,
164 flags,
165 XNN_INIT_FLAG_U8,
166 xnn_operator_type_resize_bilinear_nhwc_u8,
167 resize_op_out);
168 }
169
setup_resize_bilinear2d_nhwc(xnn_operator_t resize_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,void * output,uint32_t log2_element_size,uint32_t log2_weight_element_size,xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)170 static enum xnn_status setup_resize_bilinear2d_nhwc(
171 xnn_operator_t resize_op,
172 enum xnn_operator_type expected_operator_type,
173 size_t batch_size,
174 size_t input_height,
175 size_t input_width,
176 size_t output_height,
177 size_t output_width,
178 const void* input,
179 void* output,
180 uint32_t log2_element_size,
181 uint32_t log2_weight_element_size,
182 xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,
183 const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS(1)],
184 size_t num_threads)
185 {
186 if (resize_op->type != expected_operator_type) {
187 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
188 xnn_operator_type_to_string(expected_operator_type),
189 xnn_operator_type_to_string(resize_op->type));
190 return xnn_status_invalid_parameter;
191 }
192 resize_op->state = xnn_run_state_invalid;
193
194 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
195 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
196 xnn_operator_type_to_string(resize_op->type));
197 return xnn_status_uninitialized;
198 }
199
200 if (input_width == 0 || input_height == 0) {
201 xnn_log_error(
202 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
203 xnn_operator_type_to_string(resize_op->type), input_width, input_height);
204 return xnn_status_invalid_parameter;
205 }
206
207 if (max(input_width, input_height) >= 16777216) {
208 xnn_log_error(
209 "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
210 xnn_operator_type_to_string(resize_op->type), input_width, input_height);
211 return xnn_status_unsupported_parameter;
212 }
213
214 if (output_width == 0 || output_height == 0) {
215 xnn_log_error(
216 "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
217 xnn_operator_type_to_string(resize_op->type), output_width, output_height);
218 return xnn_status_invalid_parameter;
219 }
220
221 if (max(output_width, output_height) >= 16777216) {
222 xnn_log_error(
223 "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
224 xnn_operator_type_to_string(resize_op->type), output_width, output_height);
225 return xnn_status_unsupported_parameter;
226 }
227
228 if (batch_size == 0) {
229 resize_op->state = xnn_run_state_skip;
230 return xnn_status_success;
231 }
232
233 if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
234 const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
235 const size_t packed_weights_size = (output_height * output_width * 2) << log2_weight_element_size;
236
237 const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
238 if (indirection_buffer == NULL) {
239 xnn_log_error(
240 "failed to allocate %zu bytes for %s operator indirection buffer",
241 indirection_buffer_size, xnn_operator_type_to_string(resize_op->type));
242 return xnn_status_out_of_memory;
243 }
244 resize_op->indirection_buffer = indirection_buffer;
245
246 // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
247 xnn_release_simd_memory(resize_op->packed_weights.pointer);
248 resize_op->packed_weights.pointer = xnn_allocate_simd_memory(packed_weights_size);
249 if (resize_op->packed_weights.pointer == NULL) {
250 xnn_log_error(
251 "failed to allocate %zu bytes for %s operator packed weights",
252 packed_weights_size, xnn_operator_type_to_string(resize_op->type));
253 return xnn_status_out_of_memory;
254 }
255 }
256
257 const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride << log2_element_size;
258 if (input_height != resize_op->last_input_height ||
259 input_width != resize_op->last_input_width ||
260 output_height != resize_op->last_output_height ||
261 output_width != resize_op->last_output_width)
262 {
263 const uint32_t flags = resize_op->flags;
264 indirection_init(
265 input_pixel_stride_in_bytes,
266 input_height, input_width,
267 output_height, output_width,
268 input, resize_op->indirection_buffer, resize_op->packed_weights.pointer,
269 !!(flags & XNN_FLAG_ALIGN_CORNERS),
270 !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
271
272 resize_op->last_input = input;
273 resize_op->last_input_height = input_height;
274 resize_op->last_input_width = input_width;
275 resize_op->last_output_height = output_height;
276 resize_op->last_output_width = output_width;
277 }
278
279 const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride << log2_element_size;
280 // Resize bilinear packed weights can change when the operator is resized, we will not use weights cache.
281 assert(resize_op->weights_cache == NULL);
282 resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
283 .scaled_channels = resize_op->channels << log2_element_size,
284 .indirect_input = resize_op->indirection_buffer,
285 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
286 .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
287 .packed_weights = resize_op->packed_weights.pointer,
288 .output = output,
289 .output_pixel_stride = output_pixel_stride_in_bytes,
290 .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
291 .log2_wsize = 1 + log2_weight_element_size /* log2(2 * sizeof(weight)) */,
292 .ukernel = ibilinear->ukernel,
293 };
294
295 const size_t output_size = output_height * output_width;
296 #if XNN_TEST_MODE
297 const size_t output_size_tile = ibilinear->pixel_tile;
298 #else
299 size_t output_size_tile = output_size;
300 if (num_threads > 1) {
301 const size_t target_tiles_per_thread = 5;
302 const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
303 if (max_output_size_tile < output_size_tile) {
304 const uint32_t output_size_subtile = ibilinear->pixel_tile;
305 output_size_tile =
306 min(output_size_tile,
307 divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
308 }
309 }
310 #endif
311 resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
312 resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
313 resize_op->compute.range[0] = batch_size;
314 resize_op->compute.range[1] = output_size;
315 resize_op->compute.tile[0] = output_size_tile;
316 resize_op->state = xnn_run_state_ready;
317
318 return xnn_status_success;
319 }
320
xnn_setup_resize_bilinear2d_nhwc_f16(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,void * output,pthreadpool_t threadpool)321 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16(
322 xnn_operator_t resize_op,
323 size_t batch_size,
324 size_t input_height,
325 size_t input_width,
326 size_t output_height,
327 size_t output_width,
328 const void* input,
329 void* output,
330 pthreadpool_t threadpool)
331 {
332 return setup_resize_bilinear2d_nhwc(
333 resize_op,
334 xnn_operator_type_resize_bilinear_nhwc_f16,
335 batch_size,
336 input_height,
337 input_width,
338 output_height,
339 output_width,
340 input,
341 output,
342 1 /* log2(element size) == log2(sizeof(uint16_t)) */,
343 1 /* log2(weight element size) == log2(sizeof(uint16_t)) */,
344 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f16,
345 &xnn_params.f16.ibilinear,
346 pthreadpool_get_threads_count(threadpool));
347 }
348
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)349 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
350 xnn_operator_t resize_op,
351 size_t batch_size,
352 size_t input_height,
353 size_t input_width,
354 size_t output_height,
355 size_t output_width,
356 const float* input,
357 float* output,
358 pthreadpool_t threadpool)
359 {
360 return setup_resize_bilinear2d_nhwc(
361 resize_op,
362 xnn_operator_type_resize_bilinear_nhwc_f32,
363 batch_size,
364 input_height,
365 input_width,
366 output_height,
367 output_width,
368 input,
369 output,
370 2 /* log2(element size) == log2(sizeof(float)) */,
371 2 /* log2(weight element size) == log2(sizeof(float)) */,
372 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f32,
373 &xnn_params.f32.ibilinear,
374 pthreadpool_get_threads_count(threadpool));
375 }
376
xnn_setup_resize_bilinear2d_nhwc_s8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)377 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
378 xnn_operator_t resize_op,
379 size_t batch_size,
380 size_t input_height,
381 size_t input_width,
382 size_t output_height,
383 size_t output_width,
384 const int8_t* input,
385 int8_t* output,
386 pthreadpool_t threadpool)
387 {
388 return setup_resize_bilinear2d_nhwc(
389 resize_op,
390 xnn_operator_type_resize_bilinear_nhwc_s8,
391 batch_size,
392 input_height,
393 input_width,
394 output_height,
395 output_width,
396 input,
397 output,
398 0 /* log2(element size) == log2(sizeof(int8_t)) */,
399 1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
400 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
401 &xnn_params.s8.ibilinear,
402 pthreadpool_get_threads_count(threadpool));
403 }
404
xnn_setup_resize_bilinear2d_nhwc_u8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)405 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
406 xnn_operator_t resize_op,
407 size_t batch_size,
408 size_t input_height,
409 size_t input_width,
410 size_t output_height,
411 size_t output_width,
412 const uint8_t* input,
413 uint8_t* output,
414 pthreadpool_t threadpool)
415 {
416 return setup_resize_bilinear2d_nhwc(
417 resize_op,
418 xnn_operator_type_resize_bilinear_nhwc_u8,
419 batch_size,
420 input_height,
421 input_width,
422 output_height,
423 output_width,
424 input,
425 output,
426 0 /* log2(element size) == log2(sizeof(uint8_t)) */,
427 1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
428 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
429 &xnn_params.u8.ibilinear,
430 pthreadpool_get_threads_count(threadpool));
431 }
432