1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm> // For std::generate, std::min.
7 #include <array> // For std::array.
8 #include <cmath> // For std::lrintf.
9 #include <cstddef> // For size_t.
10 #include <cstdint> // For uint32_t.
11 #include <limits> // For std::numeric_limits.
12 #include <memory> // For std::unique_ptr.
13 #include <random> // For std::random_device, std::mt19937, std::uniform_real_distribution.
14 #include <vector> // For std::vector.
15
16 #include <xnnpack.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/requantization.h>
19 #include <xnnpack/subgraph.h>
20
21 #include <gtest/gtest.h>
22
23 template <class T, class BiasType = T> class DeconvolutionTestBase : public ::testing::Test {
24 protected:
DeconvolutionTestBase()25 DeconvolutionTestBase()
26 {
27 random_device = std::unique_ptr<std::random_device>(new std::random_device());
28 rng = std::mt19937((*random_device)());
29 input_size_dist = std::uniform_int_distribution<uint32_t>(10, 15);
30 kernel_size_dist = std::uniform_int_distribution<uint32_t>(1, 5);
31 stride_dist = std::uniform_int_distribution<uint32_t>(1, 3);
32 f32dist = std::uniform_real_distribution<float>(0.1f, 1.0f);
33 scale_dist = std::uniform_real_distribution<float>(1.0f, 5.0f);
34 i32dist = std::uniform_int_distribution<int32_t>(-10000, 10000);
35
36 batch_size = input_size_dist(rng);
37 input_height = input_size_dist(rng);
38 input_width = input_size_dist(rng);
39 kernel_height = kernel_size_dist(rng);
40 kernel_width = kernel_size_dist(rng);
41 upsampling_height = stride_dist(rng);
42 upsampling_width = stride_dist(rng);
43 dilation_height = stride_dist(rng);
44 dilation_width = stride_dist(rng);
45 groups = input_size_dist(rng);
46 group_input_channels = input_size_dist(rng);
47 group_output_channels = input_size_dist(rng);
48 output_min = -std::numeric_limits<float>::infinity();
49 output_max = std::numeric_limits<float>::infinity();
50 adjustment_height = 0;
51 adjustment_width = 0;
52 output_height = xnn_compute_deconvolution_output_dimension(
53 input_height, padding_top + padding_bottom, adjustment_height, kernel_height, dilation_height, upsampling_height);
54 output_width = xnn_compute_deconvolution_output_dimension(
55 input_width, padding_left + padding_right, adjustment_width, kernel_width, dilation_width, upsampling_width);
56
57 input_dims = {{batch_size, input_height, input_width, group_input_channels}};
58 kernel_dims = {{groups * group_output_channels, kernel_height, kernel_width, group_input_channels}};
59 bias_dims = {{groups * group_output_channels}};
60 output_dims = {{batch_size, output_height, output_width, groups * group_output_channels}};
61
62 input = std::vector<T>(
63 XNN_EXTRA_BYTES / sizeof(T) + batch_size * input_height * input_width * groups * group_input_channels);
64 kernel = std::vector<T>(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
65 bias = std::vector<BiasType>(groups * group_output_channels);
66 operator_output = std::vector<T>(batch_size * output_height * output_width * groups * group_output_channels);
67 subgraph_output = std::vector<T>(batch_size * output_height * output_width * groups * group_output_channels);
68 }
69
70 std::unique_ptr<std::random_device> random_device;
71 std::mt19937 rng;
72 std::uniform_int_distribution<uint32_t> input_size_dist;
73 std::uniform_int_distribution<uint32_t> kernel_size_dist;
74 std::uniform_int_distribution<uint32_t> stride_dist;
75 std::uniform_int_distribution<int32_t> i32dist;
76 std::uniform_real_distribution<float> f32dist;
77 std::uniform_real_distribution<float> scale_dist;
78
79 const uint32_t padding_top = 0;
80 const uint32_t padding_right = 0;
81 const uint32_t padding_bottom = 0;
82 const uint32_t padding_left = 0;
83 uint32_t batch_size;
84 uint32_t input_height;
85 uint32_t input_width;
86 uint32_t kernel_height;
87 uint32_t kernel_width;
88 uint32_t upsampling_height;
89 uint32_t upsampling_width;
90 uint32_t adjustment_height;
91 uint32_t adjustment_width;
92 uint32_t dilation_height;
93 uint32_t dilation_width;
94 uint32_t groups;
95 uint32_t group_input_channels;
96 uint32_t group_output_channels;
97 float output_min;
98 float output_max;
99 uint32_t output_height;
100 uint32_t output_width;
101
102 std::array<size_t, 4> input_dims;
103 std::array<size_t, 4> kernel_dims;
104 std::array<size_t, 1> bias_dims;
105 std::array<size_t, 4> output_dims;
106
107 std::vector<T> input;
108 std::vector<T> kernel;
109 std::vector<BiasType> bias;
110 std::vector<T> operator_output;
111 std::vector<T> subgraph_output;
112 };
113
114 template <class T> class QuantizedDeconvolutionTestBase : public DeconvolutionTestBase<T, int32_t> {
115 protected:
QuantizedDeconvolutionTestBase()116 QuantizedDeconvolutionTestBase()
117 {
118 i8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
119 w8dist = std::uniform_int_distribution<int32_t>(-std::numeric_limits<T>::max(), std::numeric_limits<T>::max());
120 std::uniform_int_distribution<int32_t> u8dist(
121 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
122 accumulators = std::vector<int32_t>(
123 this->batch_size * this->output_height * this->output_width * this->groups * this->group_output_channels);
124 }
125
initialize_accumulators_from_bias()126 void initialize_accumulators_from_bias()
127 {
128 for (size_t i = 0; i < this->batch_size; i++) {
129 for (size_t oy = 0; oy < this->output_height; oy++) {
130 for (size_t ox = 0; ox < this->output_width; ox++) {
131 for (size_t g = 0; g < this->groups; g++) {
132 for (size_t oc = 0; oc < this->group_output_channels; oc++) {
133 accumulators
134 [(((i * this->output_height + oy) * this->output_width + ox) * this->groups + g) *
135 this->group_output_channels +
136 oc] = this->bias[g * this->group_output_channels + oc];
137 }
138 }
139 }
140 }
141 }
142 }
143
144 std::uniform_int_distribution<int32_t> i8dist;
145 std::uniform_int_distribution<int32_t> u8dist;
146 std::uniform_int_distribution<int32_t> w8dist;
147 std::vector<int32_t> accumulators;
148 };
149
150 using DeconvolutionTestQS8 = QuantizedDeconvolutionTestBase<int8_t>;
151 using DeconvolutionTestQU8 = QuantizedDeconvolutionTestBase<uint8_t>;
152 using DeconvolutionTestF32 = DeconvolutionTestBase<float>;
153
TEST_F(DeconvolutionTestQS8,define)154 TEST_F(DeconvolutionTestQS8, define)
155 {
156 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
157
158 xnn_subgraph_t subgraph = nullptr;
159 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
160 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
161
162 uint32_t input_id = XNN_INVALID_NODE_ID;
163 ASSERT_EQ(
164 xnn_status_success, xnn_define_quantized_tensor_value(
165 subgraph, xnn_datatype_qint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
166 /*external_id=*/0, /*flags=*/0, &input_id));
167 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
168
169 uint32_t kernel_id = XNN_INVALID_NODE_ID;
170 ASSERT_EQ(
171 xnn_status_success, xnn_define_quantized_tensor_value(
172 subgraph, xnn_datatype_qint8, 0, 1.0f, kernel_dims.size(), kernel_dims.data(), kernel.data(),
173 /*external_id=*/1, /*flags=*/0, &kernel_id));
174
175 uint32_t bias_id = XNN_INVALID_NODE_ID;
176 ASSERT_EQ(
177 xnn_status_success, xnn_define_quantized_tensor_value(
178 subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
179 /*external_id=*/2, /*flags=*/0, &bias_id));
180
181 uint32_t output_id = XNN_INVALID_NODE_ID;
182 ASSERT_EQ(
183 xnn_status_success, xnn_define_quantized_tensor_value(
184 subgraph, xnn_datatype_qint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
185 /*external_id=*/3, /*flags=*/0, &output_id));
186 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
187
188 ASSERT_EQ(
189 xnn_status_success,
190 xnn_define_deconvolution_2d(
191 subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
192 kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
193 group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
194 /*flags=*/0));
195
196 ASSERT_EQ(subgraph->num_nodes, 1);
197 const struct xnn_node* node = &subgraph->nodes[0];
198 ASSERT_EQ(node->type, xnn_node_type_deconvolution_2d);
199 ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
200 ASSERT_EQ(node->params.deconvolution_2d.padding_top, padding_top);
201 ASSERT_EQ(node->params.deconvolution_2d.padding_right, padding_right);
202 ASSERT_EQ(node->params.deconvolution_2d.padding_bottom, padding_bottom);
203 ASSERT_EQ(node->params.deconvolution_2d.padding_left, padding_left);
204 ASSERT_EQ(node->params.deconvolution_2d.kernel_height, kernel_height);
205 ASSERT_EQ(node->params.deconvolution_2d.kernel_width, kernel_width);
206 ASSERT_EQ(node->params.deconvolution_2d.upsampling_height, upsampling_height);
207 ASSERT_EQ(node->params.deconvolution_2d.upsampling_width, upsampling_width);
208 ASSERT_EQ(node->params.deconvolution_2d.dilation_height, dilation_height);
209 ASSERT_EQ(node->params.deconvolution_2d.dilation_width, dilation_width);
210 ASSERT_EQ(node->params.deconvolution_2d.adjustment_height, adjustment_height);
211 ASSERT_EQ(node->params.deconvolution_2d.adjustment_width, adjustment_width);
212 ASSERT_EQ(node->params.deconvolution_2d.groups, groups);
213 ASSERT_EQ(node->params.deconvolution_2d.group_input_channels, group_input_channels);
214 ASSERT_EQ(node->params.deconvolution_2d.group_output_channels, group_output_channels);
215 ASSERT_EQ(node->activation.output_min, output_min);
216 ASSERT_EQ(node->activation.output_max, output_max);
217 ASSERT_EQ(node->num_inputs, 3);
218 ASSERT_EQ(node->inputs[0], input_id);
219 ASSERT_EQ(node->inputs[1], kernel_id);
220 ASSERT_EQ(node->inputs[2], bias_id);
221 ASSERT_EQ(node->num_outputs, 1);
222 ASSERT_EQ(node->outputs[0], output_id);
223 ASSERT_EQ(node->flags, 0);
224 }
225
TEST_F(DeconvolutionTestQU8,define)226 TEST_F(DeconvolutionTestQU8, define)
227 {
228 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
229
230 xnn_subgraph_t subgraph = nullptr;
231 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
232 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
233
234 uint32_t input_id = XNN_INVALID_NODE_ID;
235 ASSERT_EQ(
236 xnn_status_success, xnn_define_quantized_tensor_value(
237 subgraph, xnn_datatype_quint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
238 /*external_id=*/0, /*flags=*/0, &input_id));
239 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
240
241 uint32_t kernel_id = XNN_INVALID_NODE_ID;
242 ASSERT_EQ(
243 xnn_status_success, xnn_define_quantized_tensor_value(
244 subgraph, xnn_datatype_quint8, 0, 1.0f, kernel_dims.size(), kernel_dims.data(), kernel.data(),
245 /*external_id=*/1, /*flags=*/0, &kernel_id));
246
247 uint32_t bias_id = XNN_INVALID_NODE_ID;
248 ASSERT_EQ(
249 xnn_status_success, xnn_define_quantized_tensor_value(
250 subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
251 /*external_id=*/2, /*flags=*/0, &bias_id));
252
253 uint32_t output_id = XNN_INVALID_NODE_ID;
254 ASSERT_EQ(
255 xnn_status_success, xnn_define_quantized_tensor_value(
256 subgraph, xnn_datatype_quint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
257 /*external_id=*/3, /*flags=*/0, &output_id));
258 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
259
260 ASSERT_EQ(
261 xnn_status_success,
262 xnn_define_deconvolution_2d(
263 subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
264 kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
265 group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
266 /*flags=*/0));
267
268 ASSERT_EQ(subgraph->num_nodes, 1);
269 const struct xnn_node* node = &subgraph->nodes[0];
270 ASSERT_EQ(node->type, xnn_node_type_deconvolution_2d);
271 ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
272 ASSERT_EQ(node->params.deconvolution_2d.padding_top, padding_top);
273 ASSERT_EQ(node->params.deconvolution_2d.padding_right, padding_right);
274 ASSERT_EQ(node->params.deconvolution_2d.padding_bottom, padding_bottom);
275 ASSERT_EQ(node->params.deconvolution_2d.padding_left, padding_left);
276 ASSERT_EQ(node->params.deconvolution_2d.kernel_height, kernel_height);
277 ASSERT_EQ(node->params.deconvolution_2d.kernel_width, kernel_width);
278 ASSERT_EQ(node->params.deconvolution_2d.upsampling_height, upsampling_height);
279 ASSERT_EQ(node->params.deconvolution_2d.upsampling_width, upsampling_width);
280 ASSERT_EQ(node->params.deconvolution_2d.dilation_height, dilation_height);
281 ASSERT_EQ(node->params.deconvolution_2d.dilation_width, dilation_width);
282 ASSERT_EQ(node->params.deconvolution_2d.adjustment_height, adjustment_height);
283 ASSERT_EQ(node->params.deconvolution_2d.adjustment_width, adjustment_width);
284 ASSERT_EQ(node->params.deconvolution_2d.groups, groups);
285 ASSERT_EQ(node->params.deconvolution_2d.group_input_channels, group_input_channels);
286 ASSERT_EQ(node->params.deconvolution_2d.group_output_channels, group_output_channels);
287 ASSERT_EQ(node->activation.output_min, output_min);
288 ASSERT_EQ(node->activation.output_max, output_max);
289 ASSERT_EQ(node->num_inputs, 3);
290 ASSERT_EQ(node->inputs[0], input_id);
291 ASSERT_EQ(node->inputs[1], kernel_id);
292 ASSERT_EQ(node->inputs[2], bias_id);
293 ASSERT_EQ(node->num_outputs, 1);
294 ASSERT_EQ(node->outputs[0], output_id);
295 ASSERT_EQ(node->flags, 0);
296 }
297
TEST_F(DeconvolutionTestF32,define)298 TEST_F(DeconvolutionTestF32, define)
299 {
300 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
301
302 xnn_subgraph_t subgraph = nullptr;
303 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
304 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
305
306 uint32_t input_id = XNN_INVALID_NODE_ID;
307 ASSERT_EQ(
308 xnn_status_success, xnn_define_tensor_value(
309 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
310 /*external_id=*/0, /*flags=*/0, &input_id));
311 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
312
313 uint32_t kernel_id = XNN_INVALID_NODE_ID;
314 ASSERT_EQ(
315 xnn_status_success,
316 xnn_define_tensor_value(
317 subgraph, xnn_datatype_fp32, kernel_dims.size(), kernel_dims.data(), kernel.data(), /*external_id=*/1,
318 /*flags=*/0, &kernel_id));
319
320 uint32_t bias_id = XNN_INVALID_NODE_ID;
321 ASSERT_EQ(
322 xnn_status_success, xnn_define_tensor_value(
323 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
324 /*external_id=*/2, /*flags=*/0, &bias_id));
325
326 uint32_t output_id = XNN_INVALID_NODE_ID;
327 ASSERT_EQ(
328 xnn_status_success, xnn_define_tensor_value(
329 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
330 /*external_id=*/3, /*flags=*/0, &output_id));
331 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
332
333 ASSERT_EQ(
334 xnn_status_success,
335 xnn_define_deconvolution_2d(
336 subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
337 kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
338 group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
339 /*flags=*/0));
340
341 ASSERT_EQ(subgraph->num_nodes, 1);
342 const struct xnn_node* node = &subgraph->nodes[0];
343 ASSERT_EQ(node->type, xnn_node_type_deconvolution_2d);
344 ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
345 ASSERT_EQ(node->params.deconvolution_2d.padding_top, padding_top);
346 ASSERT_EQ(node->params.deconvolution_2d.padding_right, padding_right);
347 ASSERT_EQ(node->params.deconvolution_2d.padding_bottom, padding_bottom);
348 ASSERT_EQ(node->params.deconvolution_2d.padding_left, padding_left);
349 ASSERT_EQ(node->params.deconvolution_2d.kernel_height, kernel_height);
350 ASSERT_EQ(node->params.deconvolution_2d.kernel_width, kernel_width);
351 ASSERT_EQ(node->params.deconvolution_2d.upsampling_height, upsampling_height);
352 ASSERT_EQ(node->params.deconvolution_2d.upsampling_width, upsampling_width);
353 ASSERT_EQ(node->params.deconvolution_2d.dilation_height, dilation_height);
354 ASSERT_EQ(node->params.deconvolution_2d.dilation_width, dilation_width);
355 ASSERT_EQ(node->params.deconvolution_2d.adjustment_height, adjustment_height);
356 ASSERT_EQ(node->params.deconvolution_2d.adjustment_width, adjustment_width);
357 ASSERT_EQ(node->params.deconvolution_2d.groups, groups);
358 ASSERT_EQ(node->params.deconvolution_2d.group_input_channels, group_input_channels);
359 ASSERT_EQ(node->params.deconvolution_2d.group_output_channels, group_output_channels);
360 ASSERT_EQ(node->activation.output_min, output_min);
361 ASSERT_EQ(node->activation.output_max, output_max);
362 ASSERT_EQ(node->num_inputs, 3);
363 ASSERT_EQ(node->inputs[0], input_id);
364 ASSERT_EQ(node->inputs[1], kernel_id);
365 ASSERT_EQ(node->inputs[2], bias_id);
366 ASSERT_EQ(node->num_outputs, 1);
367 ASSERT_EQ(node->outputs[0], output_id);
368 ASSERT_EQ(node->flags, 0);
369 }
370
TEST_F(DeconvolutionTestQS8,matches_operator_api)371 TEST_F(DeconvolutionTestQS8, matches_operator_api)
372 {
373 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
374
375 xnn_operator_t op = nullptr;
376
377 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
378 std::generate(kernel.begin(), kernel.end(), [&]() { return w8dist(rng); });
379 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
380 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
381 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
382 const int8_t input_zero_point = 1;
383 const float input_scale = scale_dist(rng);
384 const float kernel_scale = scale_dist(rng);
385
386 for (size_t i = 0; i < batch_size; i++) {
387 for (size_t oy = 0; oy < output_height; oy++) {
388 for (size_t ox = 0; ox < output_width; ox++) {
389 for (size_t ky = 0; ky < kernel_height; ky++) {
390 const size_t y = oy + padding_top - ky * dilation_height;
391 const size_t iy = y / upsampling_height;
392 if (iy * upsampling_height == y && iy < input_height) {
393 for (size_t kx = 0; kx < kernel_width; kx++) {
394 const size_t x = ox + padding_left - kx * dilation_width;
395 const size_t ix = x / upsampling_width;
396 if (ix * upsampling_width == x && ix < input_width) {
397 for (size_t g = 0; g < groups; g++) {
398 for (size_t oc = 0; oc < group_output_channels; oc++) {
399 for (size_t ic = 0; ic < group_input_channels; ic++) {
400 accumulators
401 [(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
402 (int32_t(input[((i * input_height + iy) * input_width + ix) * g * group_input_channels + ic]) -
403 int32_t(input_zero_point)) *
404 int32_t(kernel
405 [(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) *
406 group_input_channels +
407 ic]);
408 }
409 }
410 }
411 }
412 }
413 }
414 }
415 }
416 }
417 }
418
419 // Compute renormalization parameters.
420 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
421 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
422
423 float output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
424 int8_t output_zero_point = int8_t(std::max(
425 std::min(
426 lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
427 long(std::numeric_limits<int8_t>::max())),
428 long(std::numeric_limits<int8_t>::min())));
429 const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
430 const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
431
432 // Call operator API.
433 const xnn_status status = xnn_create_deconvolution2d_nhwc_qs8(
434 padding_top, padding_right, padding_bottom, padding_left, kernel_height, kernel_width, upsampling_height,
435 upsampling_width, dilation_height, dilation_width, groups, group_input_channels, group_output_channels,
436 groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale, kernel_scale,
437 kernel.data(), bias.data(), output_zero_point, output_scale, quantized_output_min, quantized_output_max,
438 /*flags=*/0, nullptr, &op);
439 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
440
441 if (status == xnn_status_unsupported_hardware) {
442 GTEST_SKIP();
443 }
444
445 ASSERT_EQ(xnn_status_success, status);
446 ASSERT_NE(nullptr, op);
447 ASSERT_EQ(
448 xnn_status_success, xnn_setup_deconvolution2d_nhwc_qs8(
449 op, batch_size, input_height, input_width, adjustment_height, adjustment_width, input.data(),
450 operator_output.data(),
451 /*threadpool=*/nullptr));
452
453 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
454
455 // Call subgraph API.
456 xnn_subgraph_t subgraph = nullptr;
457 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
458 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
459
460 uint32_t input_id = XNN_INVALID_NODE_ID;
461 ASSERT_EQ(
462 xnn_status_success, xnn_define_quantized_tensor_value(
463 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(),
464 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
465 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
466
467 uint32_t kernel_id = XNN_INVALID_NODE_ID;
468 ASSERT_EQ(
469 xnn_status_success, xnn_define_quantized_tensor_value(
470 subgraph, xnn_datatype_qint8, 0, kernel_scale, kernel_dims.size(), kernel_dims.data(),
471 kernel.data(), /*external_id=*/1, /*flags=*/0, &kernel_id));
472
473 uint32_t bias_id = XNN_INVALID_NODE_ID;
474 ASSERT_EQ(
475 xnn_status_success, xnn_define_quantized_tensor_value(
476 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
477 bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
478
479 uint32_t output_id = XNN_INVALID_NODE_ID;
480 ASSERT_EQ(
481 xnn_status_success, xnn_define_quantized_tensor_value(
482 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(),
483 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
484 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
485 ASSERT_EQ(
486 xnn_status_success,
487 xnn_define_deconvolution_2d(
488 subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
489 kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
490 group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
491 /*flags=*/0));
492
493 xnn_runtime_t runtime = nullptr;
494 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
495 ASSERT_NE(nullptr, runtime);
496 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
497 std::array<xnn_external_value, 2> external = {
498 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
499 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
500 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
501
502 // Check outputs match.
503 for (size_t i = 0; i < operator_output.size(); i++) {
504 ASSERT_EQ(subgraph_output[i], operator_output[i]);
505 }
506 }
507
TEST_F(DeconvolutionTestQU8,matches_operator_api)508 TEST_F(DeconvolutionTestQU8, matches_operator_api)
509 {
510 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
511
512 xnn_operator_t op = nullptr;
513
514 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
515 std::generate(kernel.begin(), kernel.end(), [&]() { return u8dist(rng); });
516 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
517 std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
518 std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
519 const uint8_t input_zero_point = u8dist(rng);
520 const uint8_t kernel_zero_point = 0;
521 const float input_scale = scale_dist(rng);
522 const float kernel_scale = scale_dist(rng);
523
524 // Compute reference results, without renormalization.
525 initialize_accumulators_from_bias();
526 for (size_t i = 0; i < batch_size; i++) {
527 for (size_t oy = 0; oy < output_height; oy++) {
528 for (size_t ox = 0; ox < output_width; ox++) {
529 for (size_t ky = 0; ky < kernel_height; ky++) {
530 const size_t y = oy + padding_top - ky * dilation_height;
531 const size_t iy = y / upsampling_height;
532 if (iy * upsampling_height == y && iy < input_height) {
533 for (size_t kx = 0; kx < kernel_width; kx++) {
534 const size_t x = ox + padding_left - kx * dilation_width;
535 const size_t ix = x / upsampling_width;
536 if (ix * upsampling_width == x && ix < input_width) {
537 for (size_t g = 0; g < groups; g++) {
538 for (size_t oc = 0; oc < group_output_channels; oc++) {
539 for (size_t ic = 0; ic < group_input_channels; ic++) {
540 accumulators
541 [(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
542 (int32_t(input[((i * input_height + iy) * input_width + ix) * g * group_input_channels + ic]) -
543 int32_t(input_zero_point)) *
544 (int32_t(kernel
545 [(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) *
546 group_input_channels +
547 ic]) -
548 int32_t(kernel_zero_point));
549 }
550 }
551 }
552 }
553 }
554 }
555 }
556 }
557 }
558 }
559
560 // Compute renormalization parameters.
561 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
562 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
563
564 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
565 const uint8_t output_zero_point = uint8_t(std::max(
566 std::min(
567 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
568 long(std::numeric_limits<uint8_t>::max())),
569 long(std::numeric_limits<uint8_t>::min())));
570 const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point);
571 const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point);
572
573 // Call operator API.
574 const xnn_status status = xnn_create_deconvolution2d_nhwc_qu8(
575 padding_top, padding_right, padding_bottom, padding_left, kernel_height, kernel_width, upsampling_height,
576 upsampling_width, dilation_height, dilation_width, groups, group_input_channels, group_output_channels,
577 groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale, kernel_zero_point,
578 kernel_scale, kernel.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
579 quantized_output_max, /*flags=*/0, nullptr, &op);
580 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
581
582 if (status == xnn_status_unsupported_hardware) {
583 GTEST_SKIP();
584 }
585
586 ASSERT_EQ(xnn_status_success, status);
587 ASSERT_NE(nullptr, op);
588 ASSERT_EQ(
589 xnn_status_success, xnn_setup_deconvolution2d_nhwc_qu8(
590 op, batch_size, input_height, input_width, adjustment_height, adjustment_width, input.data(),
591 operator_output.data(),
592 /*threadpool=*/nullptr));
593
594 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
595
596 // Call subgraph API.
597 xnn_subgraph_t subgraph = nullptr;
598 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
599 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
600
601 uint32_t input_id = XNN_INVALID_NODE_ID;
602 ASSERT_EQ(
603 xnn_status_success, xnn_define_quantized_tensor_value(
604 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(),
605 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
606 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
607
608 uint32_t kernel_id = XNN_INVALID_NODE_ID;
609 ASSERT_EQ(
610 xnn_status_success, xnn_define_quantized_tensor_value(
611 subgraph, xnn_datatype_quint8, 0, kernel_scale, kernel_dims.size(), kernel_dims.data(),
612 kernel.data(), /*external_id=*/1, /*flags=*/0, &kernel_id));
613
614 uint32_t bias_id = XNN_INVALID_NODE_ID;
615 ASSERT_EQ(
616 xnn_status_success, xnn_define_quantized_tensor_value(
617 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
618 bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
619
620 uint32_t output_id = XNN_INVALID_NODE_ID;
621 ASSERT_EQ(
622 xnn_status_success, xnn_define_quantized_tensor_value(
623 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(),
624 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
625 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
626 ASSERT_EQ(
627 xnn_status_success,
628 xnn_define_deconvolution_2d(
629 subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
630 kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
631 group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
632 /*flags=*/0));
633
634 xnn_runtime_t runtime = nullptr;
635 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
636 ASSERT_NE(nullptr, runtime);
637 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
638 std::array<xnn_external_value, 2> external = {
639 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
640 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
641 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
642
643 // Check outputs match.
644 for (size_t i = 0; i < operator_output.size(); i++) {
645 ASSERT_EQ(subgraph_output[i], operator_output[i]);
646 }
647 }
648
TEST_F(DeconvolutionTestF32,matches_operator_api)649 TEST_F(DeconvolutionTestF32, matches_operator_api)
650 {
651 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
652
653 xnn_operator_t op = nullptr;
654
655 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
656 std::generate(kernel.begin(), kernel.end(), [&]() { return f32dist(rng); });
657 std::generate(bias.begin(), bias.end(), [&]() { return f32dist(rng); });
658 std::fill(operator_output.begin(), operator_output.end(), nanf(""));
659 std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
660
661 // Call operator API.
662 const xnn_status status = xnn_create_deconvolution2d_nhwc_f32(
663 padding_top, padding_right, padding_bottom, padding_left, kernel_height, kernel_width, upsampling_height,
664 upsampling_width, dilation_height, dilation_width, groups, group_input_channels, group_output_channels,
665 groups * group_input_channels, groups * group_output_channels, kernel.data(), bias.data(), output_min, output_max,
666 /*flags=*/0, nullptr, &op);
667 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
668
669 if (status == xnn_status_unsupported_hardware) {
670 GTEST_SKIP();
671 }
672
673 ASSERT_EQ(xnn_status_success, status);
674 ASSERT_NE(nullptr, op);
675 ASSERT_EQ(
676 xnn_status_success, xnn_setup_deconvolution2d_nhwc_f32(
677 op, batch_size, input_height, input_width, adjustment_height, adjustment_width, input.data(),
678 operator_output.data(),
679 /*threadpool=*/nullptr));
680
681 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
682
683 // Call subgraph API.
684 xnn_subgraph_t subgraph = nullptr;
685 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
686 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
687
688 uint32_t input_id = XNN_INVALID_NODE_ID;
689 ASSERT_EQ(
690 xnn_status_success, xnn_define_tensor_value(
691 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
692 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
693 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
694
695 uint32_t kernel_id = XNN_INVALID_NODE_ID;
696 ASSERT_EQ(
697 xnn_status_success, xnn_define_tensor_value(
698 subgraph, xnn_datatype_fp32, kernel_dims.size(), kernel_dims.data(), kernel.data(),
699 /*external_id=*/1, /*flags=*/0, &kernel_id));
700
701 uint32_t bias_id = XNN_INVALID_NODE_ID;
702 ASSERT_EQ(
703 xnn_status_success, xnn_define_tensor_value(
704 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
705 /*external_id=*/2, /*flags=*/0, &bias_id));
706
707 uint32_t output_id = XNN_INVALID_NODE_ID;
708 ASSERT_EQ(
709 xnn_status_success, xnn_define_tensor_value(
710 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
711 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
712 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
713 ASSERT_EQ(
714 xnn_status_success,
715 xnn_define_deconvolution_2d(
716 subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
717 kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
718 group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
719 /*flags=*/0));
720
721 xnn_runtime_t runtime = nullptr;
722 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
723 ASSERT_NE(nullptr, runtime);
724 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
725 std::array<xnn_external_value, 2> external = {
726 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
727 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
728 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
729
730 // Check outputs match.
731 for (size_t i = 0; i < operator_output.size(); i++) {
732 ASSERT_EQ(subgraph_output[i], operator_output[i]);
733 }
734 }
735