1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <stddef.h>
7 #include <stdint.h>
8
9 #include <xnnpack.h>
10 #include <xnnpack/allocator.h>
11 #include <xnnpack/operator.h>
12 #include <xnnpack/log.h>
13 #include <xnnpack/params.h>
14
15
create_depth_to_space_nhwc(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * depth_to_space_op_out)16 static enum xnn_status create_depth_to_space_nhwc(
17 size_t output_channels,
18 size_t input_channel_stride,
19 size_t output_channel_stride,
20 uint32_t block_size,
21 uint32_t flags,
22 enum xnn_operator_type operator_type,
23 xnn_operator_t* depth_to_space_op_out)
24 {
25 xnn_operator_t depth_to_space_op = NULL;
26 enum xnn_status status = xnn_status_uninitialized;
27
28 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
29 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
30 xnn_operator_type_to_string(operator_type));
31 goto error;
32 }
33
34 status = xnn_status_invalid_parameter;
35
36 if (output_channels == 0) {
37 xnn_log_error("failed to create %s operator with %zu output channels: number of channels must be non-zero",
38 xnn_operator_type_to_string(operator_type), output_channels);
39 goto error;
40 }
41
42 if (output_channel_stride < output_channels) {
43 xnn_log_error(
44 "failed to create %s operator with output channel stride of %zu: "
45 "stride must be at least as large as the number of output channels (%zu)",
46 xnn_operator_type_to_string(operator_type),
47 output_channel_stride, output_channels);
48 goto error;
49 }
50
51 if (block_size <= 1) {
52 xnn_log_error("failed to create %s operator with %" PRIu32 " block size: block size must be greater than 1",
53 xnn_operator_type_to_string(operator_type),
54 block_size);
55 goto error;
56 }
57
58 const size_t input_channels = output_channels * block_size * block_size;
59 if (input_channel_stride < input_channels) {
60 xnn_log_error(
61 "failed to create %s operator with input channel stride of %zu: "
62 "stride must be at least as large as the number of input channels (%" PRIu32 "x%" PRIu32 "x%zu)",
63 xnn_operator_type_to_string(operator_type),
64 input_channel_stride, block_size, block_size, input_channels);
65 goto error;
66 }
67
68 status = xnn_status_out_of_memory;
69
70 depth_to_space_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
71 if (depth_to_space_op == NULL) {
72 xnn_log_error(
73 "failed to allocate %zu bytes for %s operator descriptor",
74 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
75 goto error;
76 }
77
78 depth_to_space_op->channels = output_channels;
79 depth_to_space_op->input_pixel_stride = input_channel_stride;
80 depth_to_space_op->output_pixel_stride = output_channel_stride;
81 depth_to_space_op->block_size = block_size;
82
83 depth_to_space_op->type = operator_type;
84 depth_to_space_op->flags = flags;
85
86 depth_to_space_op->state = xnn_run_state_invalid;
87
88 *depth_to_space_op_out = depth_to_space_op;
89 return xnn_status_success;
90
91 error:
92 xnn_delete_operator(depth_to_space_op);
93 return status;
94 }
95
xnn_create_depth_to_space_nhwc_x8(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)96 enum xnn_status xnn_create_depth_to_space_nhwc_x8(
97 size_t output_channels,
98 size_t input_channel_stride,
99 size_t output_channel_stride,
100 uint32_t block_size,
101 uint32_t flags,
102 xnn_operator_t* depth_to_space_op_out)
103 {
104 return create_depth_to_space_nhwc(
105 output_channels,
106 input_channel_stride,
107 output_channel_stride,
108 block_size,
109 flags,
110 xnn_operator_type_depth_to_space_nhwc_x8,
111 depth_to_space_op_out);
112 }
113
xnn_create_depth_to_space_nhwc_x16(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)114 enum xnn_status xnn_create_depth_to_space_nhwc_x16(
115 size_t output_channels,
116 size_t input_channel_stride,
117 size_t output_channel_stride,
118 uint32_t block_size,
119 uint32_t flags,
120 xnn_operator_t* depth_to_space_op_out)
121 {
122 return create_depth_to_space_nhwc(
123 output_channels,
124 input_channel_stride,
125 output_channel_stride,
126 block_size,
127 flags,
128 xnn_operator_type_depth_to_space_nhwc_x16,
129 depth_to_space_op_out);
130 }
131
xnn_create_depth_to_space_nhwc_x32(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)132 enum xnn_status xnn_create_depth_to_space_nhwc_x32(
133 size_t output_channels,
134 size_t input_channel_stride,
135 size_t output_channel_stride,
136 uint32_t block_size,
137 uint32_t flags,
138 xnn_operator_t* depth_to_space_op_out)
139 {
140 return create_depth_to_space_nhwc(
141 output_channels,
142 input_channel_stride,
143 output_channel_stride,
144 block_size,
145 flags,
146 xnn_operator_type_depth_to_space_nhwc_x32,
147 depth_to_space_op_out);
148 }
149
setup_depth_to_space_nhwc(xnn_operator_t depth_to_space_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_element_size)150 static enum xnn_status setup_depth_to_space_nhwc(
151 xnn_operator_t depth_to_space_op,
152 enum xnn_operator_type expected_operator_type,
153 size_t batch_size,
154 size_t input_height,
155 size_t input_width,
156 const void* input,
157 void* output,
158 uint32_t log2_element_size)
159 {
160 if (depth_to_space_op->type != expected_operator_type) {
161 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
162 xnn_operator_type_to_string(expected_operator_type),
163 xnn_operator_type_to_string(depth_to_space_op->type));
164 return xnn_status_invalid_parameter;
165 }
166 depth_to_space_op->state = xnn_run_state_invalid;
167
168 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
169 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
170 xnn_operator_type_to_string(expected_operator_type));
171 return xnn_status_uninitialized;
172 }
173
174 if (input_width == 0 || input_height == 0) {
175 xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
176 xnn_operator_type_to_string(expected_operator_type), input_width, input_height);
177 return xnn_status_invalid_parameter;
178 }
179
180 if (batch_size == 0) {
181 depth_to_space_op->state = xnn_run_state_skip;
182 return xnn_status_success;
183 }
184
185 const uint32_t block_size = depth_to_space_op->block_size;
186 const size_t output_channels = depth_to_space_op->channels;
187 const size_t output_width = input_width * block_size;
188
189 const size_t input_pixel_stride_in_bytes = depth_to_space_op->input_pixel_stride << log2_element_size;
190 const size_t output_pixel_stride_in_bytes = depth_to_space_op->output_pixel_stride << log2_element_size;
191 depth_to_space_op->context.depthtospace2d_hwc = (struct depthtospace2d_hwc_context) {
192 .elements = output_channels << log2_element_size,
193 .input_width = input_width,
194 .block_size = (size_t) block_size,
195 .input = input,
196 .output = output,
197 .input_height_stride = input_width * input_pixel_stride_in_bytes,
198 .input_width_stride = input_pixel_stride_in_bytes,
199 .output_height_stride = output_width * output_pixel_stride_in_bytes,
200 .output_width_stride = output_pixel_stride_in_bytes,
201 .ukernel = xnn_params.xx.copy,
202 };
203 if (depth_to_space_op->output_pixel_stride == output_channels) {
204 // Transpose (N, Hi, Wi, Hb, Wb, Cout) -> (N, Hi, Hb, Wi, Wb, Cout) with Wb, Cout contiguous in memory.
205 // Optimization: copy Wb * Cout pixels at once
206 depth_to_space_op->context.depthtospace2d_hwc.elements *= block_size;
207
208 depth_to_space_op->compute.type = xnn_parallelization_type_3d;
209 depth_to_space_op->compute.task_3d = (pthreadpool_task_3d_t) xnn_compute_depthtospace2d_hwc_contiguous;
210 depth_to_space_op->compute.range[0] = batch_size * input_height;
211 depth_to_space_op->compute.range[1] = input_width;
212 depth_to_space_op->compute.range[2] = block_size;
213 } else {
214 depth_to_space_op->compute.type = xnn_parallelization_type_4d;
215 depth_to_space_op->compute.task_4d = (pthreadpool_task_4d_t) xnn_compute_depthtospace2d_hwc_strided;
216 depth_to_space_op->compute.range[0] = batch_size * input_height;
217 depth_to_space_op->compute.range[1] = input_width;
218 depth_to_space_op->compute.range[2] = block_size;
219 depth_to_space_op->compute.range[3] = block_size;
220 }
221 depth_to_space_op->state = xnn_run_state_ready;
222
223 return xnn_status_success;
224 }
225
xnn_setup_depth_to_space_nhwc_x8(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)226 enum xnn_status xnn_setup_depth_to_space_nhwc_x8(
227 xnn_operator_t depth_to_space_op,
228 size_t batch_size,
229 size_t input_height,
230 size_t input_width,
231 const void* input,
232 void* output,
233 pthreadpool_t threadpool)
234 {
235 return setup_depth_to_space_nhwc(
236 depth_to_space_op,
237 xnn_operator_type_depth_to_space_nhwc_x8,
238 batch_size, input_height, input_width,
239 input, output,
240 0 /* log2(sizeof(uint8_t)) */);
241 }
242
xnn_setup_depth_to_space_nhwc_x16(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)243 enum xnn_status xnn_setup_depth_to_space_nhwc_x16(
244 xnn_operator_t depth_to_space_op,
245 size_t batch_size,
246 size_t input_height,
247 size_t input_width,
248 const void* input,
249 void* output,
250 pthreadpool_t threadpool)
251 {
252 return setup_depth_to_space_nhwc(
253 depth_to_space_op,
254 xnn_operator_type_depth_to_space_nhwc_x16,
255 batch_size, input_height, input_width,
256 input, output,
257 1 /* log2(sizeof(uint16_t)) */);
258 }
259
xnn_setup_depth_to_space_nhwc_x32(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)260 enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
261 xnn_operator_t depth_to_space_op,
262 size_t batch_size,
263 size_t input_height,
264 size_t input_width,
265 const void* input,
266 void* output,
267 pthreadpool_t threadpool)
268 {
269 return setup_depth_to_space_nhwc(
270 depth_to_space_op,
271 xnn_operator_type_depth_to_space_nhwc_x32,
272 batch_size, input_height, input_width,
273 input, output,
274 2 /* log2(sizeof(uint32_t)) */);
275 }
276