xref: /aosp_15_r20/external/XNNPACK/src/operators/depth-to-space-nhwc.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <stddef.h>
7 #include <stdint.h>
8 
9 #include <xnnpack.h>
10 #include <xnnpack/allocator.h>
11 #include <xnnpack/operator.h>
12 #include <xnnpack/log.h>
13 #include <xnnpack/params.h>
14 
15 
create_depth_to_space_nhwc(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * depth_to_space_op_out)16 static enum xnn_status create_depth_to_space_nhwc(
17     size_t output_channels,
18     size_t input_channel_stride,
19     size_t output_channel_stride,
20     uint32_t block_size,
21     uint32_t flags,
22     enum xnn_operator_type operator_type,
23     xnn_operator_t* depth_to_space_op_out)
24 {
25   xnn_operator_t depth_to_space_op = NULL;
26   enum xnn_status status = xnn_status_uninitialized;
27 
28   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
29     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
30       xnn_operator_type_to_string(operator_type));
31     goto error;
32   }
33 
34   status = xnn_status_invalid_parameter;
35 
36   if (output_channels == 0) {
37     xnn_log_error("failed to create %s operator with %zu output channels: number of channels must be non-zero",
38       xnn_operator_type_to_string(operator_type), output_channels);
39     goto error;
40   }
41 
42   if (output_channel_stride < output_channels) {
43     xnn_log_error(
44       "failed to create %s operator with output channel stride of %zu: "
45       "stride must be at least as large as the number of output channels (%zu)",
46       xnn_operator_type_to_string(operator_type),
47       output_channel_stride, output_channels);
48     goto error;
49   }
50 
51   if (block_size <= 1) {
52     xnn_log_error("failed to create %s operator with %" PRIu32 " block size: block size must be greater than 1",
53       xnn_operator_type_to_string(operator_type),
54       block_size);
55     goto error;
56   }
57 
58   const size_t input_channels = output_channels * block_size * block_size;
59   if (input_channel_stride < input_channels) {
60     xnn_log_error(
61       "failed to create %s operator with input channel stride of %zu: "
62       "stride must be at least as large as the number of input channels (%" PRIu32 "x%" PRIu32 "x%zu)",
63       xnn_operator_type_to_string(operator_type),
64       input_channel_stride, block_size, block_size, input_channels);
65     goto error;
66   }
67 
68   status = xnn_status_out_of_memory;
69 
70   depth_to_space_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
71   if (depth_to_space_op == NULL) {
72     xnn_log_error(
73       "failed to allocate %zu bytes for %s operator descriptor",
74       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
75     goto error;
76   }
77 
78   depth_to_space_op->channels = output_channels;
79   depth_to_space_op->input_pixel_stride = input_channel_stride;
80   depth_to_space_op->output_pixel_stride = output_channel_stride;
81   depth_to_space_op->block_size = block_size;
82 
83   depth_to_space_op->type = operator_type;
84   depth_to_space_op->flags = flags;
85 
86   depth_to_space_op->state = xnn_run_state_invalid;
87 
88   *depth_to_space_op_out = depth_to_space_op;
89   return xnn_status_success;
90 
91 error:
92   xnn_delete_operator(depth_to_space_op);
93   return status;
94 }
95 
xnn_create_depth_to_space_nhwc_x8(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)96 enum xnn_status xnn_create_depth_to_space_nhwc_x8(
97     size_t output_channels,
98     size_t input_channel_stride,
99     size_t output_channel_stride,
100     uint32_t block_size,
101     uint32_t flags,
102     xnn_operator_t* depth_to_space_op_out)
103 {
104   return create_depth_to_space_nhwc(
105     output_channels,
106     input_channel_stride,
107     output_channel_stride,
108     block_size,
109     flags,
110     xnn_operator_type_depth_to_space_nhwc_x8,
111     depth_to_space_op_out);
112 }
113 
xnn_create_depth_to_space_nhwc_x16(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)114 enum xnn_status xnn_create_depth_to_space_nhwc_x16(
115     size_t output_channels,
116     size_t input_channel_stride,
117     size_t output_channel_stride,
118     uint32_t block_size,
119     uint32_t flags,
120     xnn_operator_t* depth_to_space_op_out)
121 {
122   return create_depth_to_space_nhwc(
123     output_channels,
124     input_channel_stride,
125     output_channel_stride,
126     block_size,
127     flags,
128     xnn_operator_type_depth_to_space_nhwc_x16,
129     depth_to_space_op_out);
130 }
131 
xnn_create_depth_to_space_nhwc_x32(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)132 enum xnn_status xnn_create_depth_to_space_nhwc_x32(
133     size_t output_channels,
134     size_t input_channel_stride,
135     size_t output_channel_stride,
136     uint32_t block_size,
137     uint32_t flags,
138     xnn_operator_t* depth_to_space_op_out)
139 {
140   return create_depth_to_space_nhwc(
141     output_channels,
142     input_channel_stride,
143     output_channel_stride,
144     block_size,
145     flags,
146     xnn_operator_type_depth_to_space_nhwc_x32,
147     depth_to_space_op_out);
148 }
149 
setup_depth_to_space_nhwc(xnn_operator_t depth_to_space_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_element_size)150 static enum xnn_status setup_depth_to_space_nhwc(
151     xnn_operator_t depth_to_space_op,
152     enum xnn_operator_type expected_operator_type,
153     size_t batch_size,
154     size_t input_height,
155     size_t input_width,
156     const void* input,
157     void* output,
158     uint32_t log2_element_size)
159 {
160   if (depth_to_space_op->type != expected_operator_type) {
161     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
162       xnn_operator_type_to_string(expected_operator_type),
163       xnn_operator_type_to_string(depth_to_space_op->type));
164     return xnn_status_invalid_parameter;
165   }
166   depth_to_space_op->state = xnn_run_state_invalid;
167 
168   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
169     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
170       xnn_operator_type_to_string(expected_operator_type));
171     return xnn_status_uninitialized;
172   }
173 
174   if (input_width == 0 || input_height == 0) {
175     xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
176       xnn_operator_type_to_string(expected_operator_type), input_width, input_height);
177     return xnn_status_invalid_parameter;
178   }
179 
180   if (batch_size == 0) {
181     depth_to_space_op->state = xnn_run_state_skip;
182     return xnn_status_success;
183   }
184 
185   const uint32_t block_size = depth_to_space_op->block_size;
186   const size_t output_channels = depth_to_space_op->channels;
187   const size_t output_width = input_width * block_size;
188 
189   const size_t input_pixel_stride_in_bytes = depth_to_space_op->input_pixel_stride << log2_element_size;
190   const size_t output_pixel_stride_in_bytes = depth_to_space_op->output_pixel_stride << log2_element_size;
191   depth_to_space_op->context.depthtospace2d_hwc = (struct depthtospace2d_hwc_context) {
192     .elements = output_channels << log2_element_size,
193     .input_width = input_width,
194     .block_size = (size_t) block_size,
195     .input = input,
196     .output = output,
197     .input_height_stride = input_width * input_pixel_stride_in_bytes,
198     .input_width_stride = input_pixel_stride_in_bytes,
199     .output_height_stride = output_width * output_pixel_stride_in_bytes,
200     .output_width_stride = output_pixel_stride_in_bytes,
201     .ukernel = xnn_params.xx.copy,
202   };
203   if (depth_to_space_op->output_pixel_stride == output_channels) {
204     // Transpose (N, Hi, Wi, Hb, Wb, Cout) -> (N, Hi, Hb, Wi, Wb, Cout) with Wb, Cout contiguous in memory.
205     // Optimization: copy Wb * Cout pixels at once
206     depth_to_space_op->context.depthtospace2d_hwc.elements *= block_size;
207 
208     depth_to_space_op->compute.type = xnn_parallelization_type_3d;
209     depth_to_space_op->compute.task_3d = (pthreadpool_task_3d_t) xnn_compute_depthtospace2d_hwc_contiguous;
210     depth_to_space_op->compute.range[0] = batch_size * input_height;
211     depth_to_space_op->compute.range[1] = input_width;
212     depth_to_space_op->compute.range[2] = block_size;
213   } else {
214     depth_to_space_op->compute.type = xnn_parallelization_type_4d;
215     depth_to_space_op->compute.task_4d = (pthreadpool_task_4d_t) xnn_compute_depthtospace2d_hwc_strided;
216     depth_to_space_op->compute.range[0] = batch_size * input_height;
217     depth_to_space_op->compute.range[1] = input_width;
218     depth_to_space_op->compute.range[2] = block_size;
219     depth_to_space_op->compute.range[3] = block_size;
220   }
221   depth_to_space_op->state = xnn_run_state_ready;
222 
223   return xnn_status_success;
224 }
225 
xnn_setup_depth_to_space_nhwc_x8(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)226 enum xnn_status xnn_setup_depth_to_space_nhwc_x8(
227     xnn_operator_t depth_to_space_op,
228     size_t batch_size,
229     size_t input_height,
230     size_t input_width,
231     const void* input,
232     void* output,
233     pthreadpool_t threadpool)
234 {
235   return setup_depth_to_space_nhwc(
236     depth_to_space_op,
237     xnn_operator_type_depth_to_space_nhwc_x8,
238     batch_size, input_height, input_width,
239     input, output,
240     0 /* log2(sizeof(uint8_t)) */);
241 }
242 
xnn_setup_depth_to_space_nhwc_x16(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)243 enum xnn_status xnn_setup_depth_to_space_nhwc_x16(
244     xnn_operator_t depth_to_space_op,
245     size_t batch_size,
246     size_t input_height,
247     size_t input_width,
248     const void* input,
249     void* output,
250     pthreadpool_t threadpool)
251 {
252   return setup_depth_to_space_nhwc(
253     depth_to_space_op,
254     xnn_operator_type_depth_to_space_nhwc_x16,
255     batch_size, input_height, input_width,
256     input, output,
257     1 /* log2(sizeof(uint16_t)) */);
258 }
259 
xnn_setup_depth_to_space_nhwc_x32(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)260 enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
261     xnn_operator_t depth_to_space_op,
262     size_t batch_size,
263     size_t input_height,
264     size_t input_width,
265     const void* input,
266     void* output,
267     pthreadpool_t threadpool)
268 {
269   return setup_depth_to_space_nhwc(
270     depth_to_space_op,
271     xnn_operator_type_depth_to_space_nhwc_x32,
272     batch_size, input_height, input_width,
273     input, output,
274     2 /* log2(sizeof(uint32_t)) */);
275 }
276