xref: /aosp_15_r20/external/XNNPACK/src/operators/resize-bilinear-nhwc.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22 
23 
create_resize_bilinear2d_nhwc(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * resize_op_out)24 static enum xnn_status create_resize_bilinear2d_nhwc(
25     size_t channels,
26     size_t input_pixel_stride,
27     size_t output_pixel_stride,
28     uint32_t flags,
29     uint32_t datatype_init_flags,
30     enum xnn_operator_type operator_type,
31     xnn_operator_t* resize_op_out)
32 {
33   xnn_operator_t resize_op = NULL;
34   enum xnn_status status = xnn_status_uninitialized;
35 
36   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
37     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
38       xnn_operator_type_to_string(operator_type));
39     goto error;
40   }
41 
42   status = xnn_status_unsupported_hardware;
43 
44   if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
45     xnn_log_error("failed to create %s operator: operations on data type are not supported",
46       xnn_operator_type_to_string(operator_type));
47     goto error;
48   }
49 
50   status = xnn_status_invalid_parameter;
51 
52   if (channels == 0) {
53     xnn_log_error(
54       "failed to create %s operator with %zu channels: number of channels must be non-zero",
55       xnn_operator_type_to_string(operator_type), channels);
56     goto error;
57   }
58 
59   if (input_pixel_stride < channels) {
60     xnn_log_error(
61       "failed to create %s operator with input pixel stride of %zu: "
62       "stride must be at least as large as the number of channels (%zu)",
63       xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
64     goto error;
65   }
66 
67   if (output_pixel_stride < channels) {
68     xnn_log_error(
69       "failed to create %s operator with output pixel stride of %zu: "
70       "stride must be at least as large as the number of channels (%zu)",
71       xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
72     goto error;
73   }
74 
75   status = xnn_status_out_of_memory;
76 
77   resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
78   if (resize_op == NULL) {
79     xnn_log_error(
80       "failed to allocate %zu bytes for %s operator descriptor",
81       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
82     goto error;
83   }
84 
85   resize_op->channels = channels;
86   resize_op->input_pixel_stride = input_pixel_stride;
87   resize_op->output_pixel_stride = output_pixel_stride;
88 
89   resize_op->type = operator_type;
90   resize_op->flags = flags;
91 
92   resize_op->state = xnn_run_state_invalid;
93 
94   *resize_op_out = resize_op;
95   return xnn_status_success;
96 
97 error:
98   xnn_delete_operator(resize_op);
99   return status;
100 }
101 
xnn_create_resize_bilinear2d_nhwc_f16(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)102 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16(
103     size_t channels,
104     size_t input_pixel_stride,
105     size_t output_pixel_stride,
106     uint32_t flags,
107     xnn_operator_t* resize_op_out)
108 {
109   return create_resize_bilinear2d_nhwc(
110     channels,
111     input_pixel_stride,
112     output_pixel_stride,
113     flags,
114     XNN_INIT_FLAG_F16,
115     xnn_operator_type_resize_bilinear_nhwc_f16,
116     resize_op_out);
117 }
118 
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)119 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
120     size_t channels,
121     size_t input_pixel_stride,
122     size_t output_pixel_stride,
123     uint32_t flags,
124     xnn_operator_t* resize_op_out)
125 {
126   return create_resize_bilinear2d_nhwc(
127     channels,
128     input_pixel_stride,
129     output_pixel_stride,
130     flags,
131     XNN_INIT_FLAG_F32,
132     xnn_operator_type_resize_bilinear_nhwc_f32,
133     resize_op_out);
134 }
135 
xnn_create_resize_bilinear2d_nhwc_s8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)136 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
137     size_t channels,
138     size_t input_pixel_stride,
139     size_t output_pixel_stride,
140     uint32_t flags,
141     xnn_operator_t* resize_op_out)
142 {
143   return create_resize_bilinear2d_nhwc(
144     channels,
145     input_pixel_stride,
146     output_pixel_stride,
147     flags,
148     XNN_INIT_FLAG_S8,
149     xnn_operator_type_resize_bilinear_nhwc_s8,
150     resize_op_out);
151 }
152 
xnn_create_resize_bilinear2d_nhwc_u8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)153 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
154     size_t channels,
155     size_t input_pixel_stride,
156     size_t output_pixel_stride,
157     uint32_t flags,
158     xnn_operator_t* resize_op_out)
159 {
160   return create_resize_bilinear2d_nhwc(
161     channels,
162     input_pixel_stride,
163     output_pixel_stride,
164     flags,
165     XNN_INIT_FLAG_U8,
166     xnn_operator_type_resize_bilinear_nhwc_u8,
167     resize_op_out);
168 }
169 
setup_resize_bilinear2d_nhwc(xnn_operator_t resize_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,void * output,uint32_t log2_element_size,uint32_t log2_weight_element_size,xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)170 static enum xnn_status setup_resize_bilinear2d_nhwc(
171     xnn_operator_t resize_op,
172     enum xnn_operator_type expected_operator_type,
173     size_t batch_size,
174     size_t input_height,
175     size_t input_width,
176     size_t output_height,
177     size_t output_width,
178     const void* input,
179     void* output,
180     uint32_t log2_element_size,
181     uint32_t log2_weight_element_size,
182     xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,
183     const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS(1)],
184     size_t num_threads)
185 {
186   if (resize_op->type != expected_operator_type) {
187     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
188       xnn_operator_type_to_string(expected_operator_type),
189       xnn_operator_type_to_string(resize_op->type));
190     return xnn_status_invalid_parameter;
191   }
192   resize_op->state = xnn_run_state_invalid;
193 
194   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
195     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
196       xnn_operator_type_to_string(resize_op->type));
197     return xnn_status_uninitialized;
198   }
199 
200   if (input_width == 0 || input_height == 0) {
201     xnn_log_error(
202       "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
203       xnn_operator_type_to_string(resize_op->type), input_width, input_height);
204     return xnn_status_invalid_parameter;
205   }
206 
207   if (max(input_width, input_height) >= 16777216) {
208     xnn_log_error(
209       "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
210       xnn_operator_type_to_string(resize_op->type), input_width, input_height);
211     return xnn_status_unsupported_parameter;
212   }
213 
214   if (output_width == 0 || output_height == 0) {
215     xnn_log_error(
216       "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
217       xnn_operator_type_to_string(resize_op->type), output_width, output_height);
218     return xnn_status_invalid_parameter;
219   }
220 
221   if (max(output_width, output_height) >= 16777216) {
222     xnn_log_error(
223       "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
224       xnn_operator_type_to_string(resize_op->type), output_width, output_height);
225     return xnn_status_unsupported_parameter;
226   }
227 
228   if (batch_size == 0) {
229     resize_op->state = xnn_run_state_skip;
230     return xnn_status_success;
231   }
232 
233   if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
234     const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
235     const size_t packed_weights_size = (output_height * output_width * 2) << log2_weight_element_size;
236 
237     const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
238     if (indirection_buffer == NULL) {
239       xnn_log_error(
240         "failed to allocate %zu bytes for %s operator indirection buffer",
241         indirection_buffer_size, xnn_operator_type_to_string(resize_op->type));
242       return xnn_status_out_of_memory;
243     }
244     resize_op->indirection_buffer = indirection_buffer;
245 
246     // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
247     xnn_release_simd_memory(resize_op->packed_weights.pointer);
248     resize_op->packed_weights.pointer = xnn_allocate_simd_memory(packed_weights_size);
249     if (resize_op->packed_weights.pointer == NULL) {
250       xnn_log_error(
251         "failed to allocate %zu bytes for %s operator packed weights",
252         packed_weights_size, xnn_operator_type_to_string(resize_op->type));
253       return xnn_status_out_of_memory;
254     }
255   }
256 
257   const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride << log2_element_size;
258   if (input_height != resize_op->last_input_height ||
259       input_width != resize_op->last_input_width ||
260       output_height != resize_op->last_output_height ||
261       output_width != resize_op->last_output_width)
262   {
263     const uint32_t flags = resize_op->flags;
264     indirection_init(
265       input_pixel_stride_in_bytes,
266       input_height, input_width,
267       output_height, output_width,
268       input, resize_op->indirection_buffer, resize_op->packed_weights.pointer,
269       !!(flags & XNN_FLAG_ALIGN_CORNERS),
270       !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
271 
272     resize_op->last_input = input;
273     resize_op->last_input_height = input_height;
274     resize_op->last_input_width = input_width;
275     resize_op->last_output_height = output_height;
276     resize_op->last_output_width = output_width;
277   }
278 
279   const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride << log2_element_size;
280   // Resize bilinear packed weights can change when the operator is resized, we will not use weights cache.
281   assert(resize_op->weights_cache == NULL);
282   resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
283     .scaled_channels = resize_op->channels << log2_element_size,
284     .indirect_input = resize_op->indirection_buffer,
285     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
286     .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
287     .packed_weights = resize_op->packed_weights.pointer,
288     .output = output,
289     .output_pixel_stride = output_pixel_stride_in_bytes,
290     .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
291     .log2_wsize = 1 + log2_weight_element_size /* log2(2 * sizeof(weight)) */,
292     .ukernel = ibilinear->ukernel,
293   };
294 
295   const size_t output_size = output_height * output_width;
296   #if XNN_TEST_MODE
297     const size_t output_size_tile = ibilinear->pixel_tile;
298   #else
299     size_t output_size_tile = output_size;
300     if (num_threads > 1) {
301       const size_t target_tiles_per_thread = 5;
302       const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
303       if (max_output_size_tile < output_size_tile) {
304         const uint32_t output_size_subtile = ibilinear->pixel_tile;
305         output_size_tile =
306           min(output_size_tile,
307             divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
308       }
309     }
310   #endif
311   resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
312   resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
313   resize_op->compute.range[0] = batch_size;
314   resize_op->compute.range[1] = output_size;
315   resize_op->compute.tile[0] = output_size_tile;
316   resize_op->state = xnn_run_state_ready;
317 
318   return xnn_status_success;
319 }
320 
xnn_setup_resize_bilinear2d_nhwc_f16(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,void * output,pthreadpool_t threadpool)321 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16(
322     xnn_operator_t resize_op,
323     size_t batch_size,
324     size_t input_height,
325     size_t input_width,
326     size_t output_height,
327     size_t output_width,
328     const void* input,
329     void* output,
330     pthreadpool_t threadpool)
331 {
332   return setup_resize_bilinear2d_nhwc(
333     resize_op,
334     xnn_operator_type_resize_bilinear_nhwc_f16,
335     batch_size,
336     input_height,
337     input_width,
338     output_height,
339     output_width,
340     input,
341     output,
342     1 /* log2(element size) == log2(sizeof(uint16_t)) */,
343     1 /* log2(weight element size) == log2(sizeof(uint16_t)) */,
344     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f16,
345     &xnn_params.f16.ibilinear,
346     pthreadpool_get_threads_count(threadpool));
347 }
348 
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)349 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
350     xnn_operator_t resize_op,
351     size_t batch_size,
352     size_t input_height,
353     size_t input_width,
354     size_t output_height,
355     size_t output_width,
356     const float* input,
357     float* output,
358     pthreadpool_t threadpool)
359 {
360   return setup_resize_bilinear2d_nhwc(
361     resize_op,
362     xnn_operator_type_resize_bilinear_nhwc_f32,
363     batch_size,
364     input_height,
365     input_width,
366     output_height,
367     output_width,
368     input,
369     output,
370     2 /* log2(element size) == log2(sizeof(float)) */,
371     2 /* log2(weight element size) == log2(sizeof(float)) */,
372     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f32,
373     &xnn_params.f32.ibilinear,
374     pthreadpool_get_threads_count(threadpool));
375 }
376 
xnn_setup_resize_bilinear2d_nhwc_s8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)377 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
378     xnn_operator_t resize_op,
379     size_t batch_size,
380     size_t input_height,
381     size_t input_width,
382     size_t output_height,
383     size_t output_width,
384     const int8_t* input,
385     int8_t* output,
386     pthreadpool_t threadpool)
387 {
388   return setup_resize_bilinear2d_nhwc(
389     resize_op,
390     xnn_operator_type_resize_bilinear_nhwc_s8,
391     batch_size,
392     input_height,
393     input_width,
394     output_height,
395     output_width,
396     input,
397     output,
398     0 /* log2(element size) == log2(sizeof(int8_t)) */,
399     1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
400     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
401     &xnn_params.s8.ibilinear,
402     pthreadpool_get_threads_count(threadpool));
403 }
404 
xnn_setup_resize_bilinear2d_nhwc_u8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)405 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
406     xnn_operator_t resize_op,
407     size_t batch_size,
408     size_t input_height,
409     size_t input_width,
410     size_t output_height,
411     size_t output_width,
412     const uint8_t* input,
413     uint8_t* output,
414     pthreadpool_t threadpool)
415 {
416   return setup_resize_bilinear2d_nhwc(
417     resize_op,
418     xnn_operator_type_resize_bilinear_nhwc_u8,
419     batch_size,
420     input_height,
421     input_width,
422     output_height,
423     output_width,
424     input,
425     output,
426     0 /* log2(element size) == log2(sizeof(uint8_t)) */,
427     1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
428     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
429     &xnn_params.u8.ibilinear,
430     pthreadpool_get_threads_count(threadpool));
431 }
432