xref: /aosp_15_r20/external/XNNPACK/test/deconvolution-2d.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>  // For std::generate, std::min.
7 #include <array>      // For std::array.
8 #include <cmath>      // For std::lrintf.
9 #include <cstddef>    // For size_t.
10 #include <cstdint>    // For uint32_t.
11 #include <limits>     // For std::numeric_limits.
12 #include <memory>     // For std::unique_ptr.
13 #include <random>     // For std::random_device, std::mt19937, std::uniform_real_distribution.
14 #include <vector>     // For std::vector.
15 
16 #include <xnnpack.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/requantization.h>
19 #include <xnnpack/subgraph.h>
20 
21 #include <gtest/gtest.h>
22 
23 template <class T, class BiasType = T> class DeconvolutionTestBase : public ::testing::Test {
24 protected:
DeconvolutionTestBase()25   DeconvolutionTestBase()
26   {
27     random_device = std::unique_ptr<std::random_device>(new std::random_device());
28     rng = std::mt19937((*random_device)());
29     input_size_dist = std::uniform_int_distribution<uint32_t>(10, 15);
30     kernel_size_dist = std::uniform_int_distribution<uint32_t>(1, 5);
31     stride_dist = std::uniform_int_distribution<uint32_t>(1, 3);
32     f32dist = std::uniform_real_distribution<float>(0.1f, 1.0f);
33     scale_dist = std::uniform_real_distribution<float>(1.0f, 5.0f);
34     i32dist = std::uniform_int_distribution<int32_t>(-10000, 10000);
35 
36     batch_size = input_size_dist(rng);
37     input_height = input_size_dist(rng);
38     input_width = input_size_dist(rng);
39     kernel_height = kernel_size_dist(rng);
40     kernel_width = kernel_size_dist(rng);
41     upsampling_height = stride_dist(rng);
42     upsampling_width = stride_dist(rng);
43     dilation_height = stride_dist(rng);
44     dilation_width = stride_dist(rng);
45     groups = input_size_dist(rng);
46     group_input_channels = input_size_dist(rng);
47     group_output_channels = input_size_dist(rng);
48     output_min = -std::numeric_limits<float>::infinity();
49     output_max = std::numeric_limits<float>::infinity();
50     adjustment_height = 0;
51     adjustment_width = 0;
52     output_height = xnn_compute_deconvolution_output_dimension(
53       input_height, padding_top + padding_bottom, adjustment_height, kernel_height, dilation_height, upsampling_height);
54     output_width = xnn_compute_deconvolution_output_dimension(
55       input_width, padding_left + padding_right, adjustment_width, kernel_width, dilation_width, upsampling_width);
56 
57     input_dims = {{batch_size, input_height, input_width, group_input_channels}};
58     kernel_dims = {{groups * group_output_channels, kernel_height, kernel_width, group_input_channels}};
59     bias_dims = {{groups * group_output_channels}};
60     output_dims = {{batch_size, output_height, output_width, groups * group_output_channels}};
61 
62     input = std::vector<T>(
63       XNN_EXTRA_BYTES / sizeof(T) + batch_size * input_height * input_width * groups * group_input_channels);
64     kernel = std::vector<T>(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
65     bias = std::vector<BiasType>(groups * group_output_channels);
66     operator_output = std::vector<T>(batch_size * output_height * output_width * groups * group_output_channels);
67     subgraph_output = std::vector<T>(batch_size * output_height * output_width * groups * group_output_channels);
68   }
69 
70   std::unique_ptr<std::random_device> random_device;
71   std::mt19937 rng;
72   std::uniform_int_distribution<uint32_t> input_size_dist;
73   std::uniform_int_distribution<uint32_t> kernel_size_dist;
74   std::uniform_int_distribution<uint32_t> stride_dist;
75   std::uniform_int_distribution<int32_t> i32dist;
76   std::uniform_real_distribution<float> f32dist;
77   std::uniform_real_distribution<float> scale_dist;
78 
79   const uint32_t padding_top = 0;
80   const uint32_t padding_right = 0;
81   const uint32_t padding_bottom = 0;
82   const uint32_t padding_left = 0;
83   uint32_t batch_size;
84   uint32_t input_height;
85   uint32_t input_width;
86   uint32_t kernel_height;
87   uint32_t kernel_width;
88   uint32_t upsampling_height;
89   uint32_t upsampling_width;
90   uint32_t adjustment_height;
91   uint32_t adjustment_width;
92   uint32_t dilation_height;
93   uint32_t dilation_width;
94   uint32_t groups;
95   uint32_t group_input_channels;
96   uint32_t group_output_channels;
97   float output_min;
98   float output_max;
99   uint32_t output_height;
100   uint32_t output_width;
101 
102   std::array<size_t, 4> input_dims;
103   std::array<size_t, 4> kernel_dims;
104   std::array<size_t, 1> bias_dims;
105   std::array<size_t, 4> output_dims;
106 
107   std::vector<T> input;
108   std::vector<T> kernel;
109   std::vector<BiasType> bias;
110   std::vector<T> operator_output;
111   std::vector<T> subgraph_output;
112 };
113 
114 template <class T> class QuantizedDeconvolutionTestBase : public DeconvolutionTestBase<T, int32_t> {
115 protected:
QuantizedDeconvolutionTestBase()116   QuantizedDeconvolutionTestBase()
117   {
118     i8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
119     w8dist = std::uniform_int_distribution<int32_t>(-std::numeric_limits<T>::max(), std::numeric_limits<T>::max());
120     std::uniform_int_distribution<int32_t> u8dist(
121       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
122     accumulators = std::vector<int32_t>(
123       this->batch_size * this->output_height * this->output_width * this->groups * this->group_output_channels);
124   }
125 
initialize_accumulators_from_bias()126   void initialize_accumulators_from_bias()
127   {
128     for (size_t i = 0; i < this->batch_size; i++) {
129       for (size_t oy = 0; oy < this->output_height; oy++) {
130         for (size_t ox = 0; ox < this->output_width; ox++) {
131           for (size_t g = 0; g < this->groups; g++) {
132             for (size_t oc = 0; oc < this->group_output_channels; oc++) {
133               accumulators
134                 [(((i * this->output_height + oy) * this->output_width + ox) * this->groups + g) *
135                    this->group_output_channels +
136                  oc] = this->bias[g * this->group_output_channels + oc];
137             }
138           }
139         }
140       }
141     }
142   }
143 
144   std::uniform_int_distribution<int32_t> i8dist;
145   std::uniform_int_distribution<int32_t> u8dist;
146   std::uniform_int_distribution<int32_t> w8dist;
147   std::vector<int32_t> accumulators;
148 };
149 
150 using DeconvolutionTestQS8 = QuantizedDeconvolutionTestBase<int8_t>;
151 using DeconvolutionTestQU8 = QuantizedDeconvolutionTestBase<uint8_t>;
152 using DeconvolutionTestF32 = DeconvolutionTestBase<float>;
153 
TEST_F(DeconvolutionTestQS8,define)154 TEST_F(DeconvolutionTestQS8, define)
155 {
156   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
157 
158   xnn_subgraph_t subgraph = nullptr;
159   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
160   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
161 
162   uint32_t input_id = XNN_INVALID_NODE_ID;
163   ASSERT_EQ(
164     xnn_status_success, xnn_define_quantized_tensor_value(
165                           subgraph, xnn_datatype_qint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
166                           /*external_id=*/0, /*flags=*/0, &input_id));
167   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
168 
169   uint32_t kernel_id = XNN_INVALID_NODE_ID;
170   ASSERT_EQ(
171     xnn_status_success, xnn_define_quantized_tensor_value(
172                           subgraph, xnn_datatype_qint8, 0, 1.0f, kernel_dims.size(), kernel_dims.data(), kernel.data(),
173                           /*external_id=*/1, /*flags=*/0, &kernel_id));
174 
175   uint32_t bias_id = XNN_INVALID_NODE_ID;
176   ASSERT_EQ(
177     xnn_status_success, xnn_define_quantized_tensor_value(
178                           subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
179                           /*external_id=*/2, /*flags=*/0, &bias_id));
180 
181   uint32_t output_id = XNN_INVALID_NODE_ID;
182   ASSERT_EQ(
183     xnn_status_success, xnn_define_quantized_tensor_value(
184                           subgraph, xnn_datatype_qint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
185                           /*external_id=*/3, /*flags=*/0, &output_id));
186   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
187 
188   ASSERT_EQ(
189     xnn_status_success,
190     xnn_define_deconvolution_2d(
191       subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
192       kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
193       group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
194       /*flags=*/0));
195 
196   ASSERT_EQ(subgraph->num_nodes, 1);
197   const struct xnn_node* node = &subgraph->nodes[0];
198   ASSERT_EQ(node->type, xnn_node_type_deconvolution_2d);
199   ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
200   ASSERT_EQ(node->params.deconvolution_2d.padding_top, padding_top);
201   ASSERT_EQ(node->params.deconvolution_2d.padding_right, padding_right);
202   ASSERT_EQ(node->params.deconvolution_2d.padding_bottom, padding_bottom);
203   ASSERT_EQ(node->params.deconvolution_2d.padding_left, padding_left);
204   ASSERT_EQ(node->params.deconvolution_2d.kernel_height, kernel_height);
205   ASSERT_EQ(node->params.deconvolution_2d.kernel_width, kernel_width);
206   ASSERT_EQ(node->params.deconvolution_2d.upsampling_height, upsampling_height);
207   ASSERT_EQ(node->params.deconvolution_2d.upsampling_width, upsampling_width);
208   ASSERT_EQ(node->params.deconvolution_2d.dilation_height, dilation_height);
209   ASSERT_EQ(node->params.deconvolution_2d.dilation_width, dilation_width);
210   ASSERT_EQ(node->params.deconvolution_2d.adjustment_height, adjustment_height);
211   ASSERT_EQ(node->params.deconvolution_2d.adjustment_width, adjustment_width);
212   ASSERT_EQ(node->params.deconvolution_2d.groups, groups);
213   ASSERT_EQ(node->params.deconvolution_2d.group_input_channels, group_input_channels);
214   ASSERT_EQ(node->params.deconvolution_2d.group_output_channels, group_output_channels);
215   ASSERT_EQ(node->activation.output_min, output_min);
216   ASSERT_EQ(node->activation.output_max, output_max);
217   ASSERT_EQ(node->num_inputs, 3);
218   ASSERT_EQ(node->inputs[0], input_id);
219   ASSERT_EQ(node->inputs[1], kernel_id);
220   ASSERT_EQ(node->inputs[2], bias_id);
221   ASSERT_EQ(node->num_outputs, 1);
222   ASSERT_EQ(node->outputs[0], output_id);
223   ASSERT_EQ(node->flags, 0);
224 }
225 
TEST_F(DeconvolutionTestQU8,define)226 TEST_F(DeconvolutionTestQU8, define)
227 {
228   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
229 
230   xnn_subgraph_t subgraph = nullptr;
231   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
232   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
233 
234   uint32_t input_id = XNN_INVALID_NODE_ID;
235   ASSERT_EQ(
236     xnn_status_success, xnn_define_quantized_tensor_value(
237                           subgraph, xnn_datatype_quint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
238                           /*external_id=*/0, /*flags=*/0, &input_id));
239   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
240 
241   uint32_t kernel_id = XNN_INVALID_NODE_ID;
242   ASSERT_EQ(
243     xnn_status_success, xnn_define_quantized_tensor_value(
244                           subgraph, xnn_datatype_quint8, 0, 1.0f, kernel_dims.size(), kernel_dims.data(), kernel.data(),
245                           /*external_id=*/1, /*flags=*/0, &kernel_id));
246 
247   uint32_t bias_id = XNN_INVALID_NODE_ID;
248   ASSERT_EQ(
249     xnn_status_success, xnn_define_quantized_tensor_value(
250                           subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
251                           /*external_id=*/2, /*flags=*/0, &bias_id));
252 
253   uint32_t output_id = XNN_INVALID_NODE_ID;
254   ASSERT_EQ(
255     xnn_status_success, xnn_define_quantized_tensor_value(
256                           subgraph, xnn_datatype_quint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
257                           /*external_id=*/3, /*flags=*/0, &output_id));
258   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
259 
260   ASSERT_EQ(
261     xnn_status_success,
262     xnn_define_deconvolution_2d(
263       subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
264       kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
265       group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
266       /*flags=*/0));
267 
268   ASSERT_EQ(subgraph->num_nodes, 1);
269   const struct xnn_node* node = &subgraph->nodes[0];
270   ASSERT_EQ(node->type, xnn_node_type_deconvolution_2d);
271   ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
272   ASSERT_EQ(node->params.deconvolution_2d.padding_top, padding_top);
273   ASSERT_EQ(node->params.deconvolution_2d.padding_right, padding_right);
274   ASSERT_EQ(node->params.deconvolution_2d.padding_bottom, padding_bottom);
275   ASSERT_EQ(node->params.deconvolution_2d.padding_left, padding_left);
276   ASSERT_EQ(node->params.deconvolution_2d.kernel_height, kernel_height);
277   ASSERT_EQ(node->params.deconvolution_2d.kernel_width, kernel_width);
278   ASSERT_EQ(node->params.deconvolution_2d.upsampling_height, upsampling_height);
279   ASSERT_EQ(node->params.deconvolution_2d.upsampling_width, upsampling_width);
280   ASSERT_EQ(node->params.deconvolution_2d.dilation_height, dilation_height);
281   ASSERT_EQ(node->params.deconvolution_2d.dilation_width, dilation_width);
282   ASSERT_EQ(node->params.deconvolution_2d.adjustment_height, adjustment_height);
283   ASSERT_EQ(node->params.deconvolution_2d.adjustment_width, adjustment_width);
284   ASSERT_EQ(node->params.deconvolution_2d.groups, groups);
285   ASSERT_EQ(node->params.deconvolution_2d.group_input_channels, group_input_channels);
286   ASSERT_EQ(node->params.deconvolution_2d.group_output_channels, group_output_channels);
287   ASSERT_EQ(node->activation.output_min, output_min);
288   ASSERT_EQ(node->activation.output_max, output_max);
289   ASSERT_EQ(node->num_inputs, 3);
290   ASSERT_EQ(node->inputs[0], input_id);
291   ASSERT_EQ(node->inputs[1], kernel_id);
292   ASSERT_EQ(node->inputs[2], bias_id);
293   ASSERT_EQ(node->num_outputs, 1);
294   ASSERT_EQ(node->outputs[0], output_id);
295   ASSERT_EQ(node->flags, 0);
296 }
297 
TEST_F(DeconvolutionTestF32,define)298 TEST_F(DeconvolutionTestF32, define)
299 {
300   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
301 
302   xnn_subgraph_t subgraph = nullptr;
303   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
304   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
305 
306   uint32_t input_id = XNN_INVALID_NODE_ID;
307   ASSERT_EQ(
308     xnn_status_success, xnn_define_tensor_value(
309                           subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
310                           /*external_id=*/0, /*flags=*/0, &input_id));
311   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
312 
313   uint32_t kernel_id = XNN_INVALID_NODE_ID;
314   ASSERT_EQ(
315     xnn_status_success,
316     xnn_define_tensor_value(
317       subgraph, xnn_datatype_fp32, kernel_dims.size(), kernel_dims.data(), kernel.data(), /*external_id=*/1,
318       /*flags=*/0, &kernel_id));
319 
320   uint32_t bias_id = XNN_INVALID_NODE_ID;
321   ASSERT_EQ(
322     xnn_status_success, xnn_define_tensor_value(
323                           subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
324                           /*external_id=*/2, /*flags=*/0, &bias_id));
325 
326   uint32_t output_id = XNN_INVALID_NODE_ID;
327   ASSERT_EQ(
328     xnn_status_success, xnn_define_tensor_value(
329                           subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
330                           /*external_id=*/3, /*flags=*/0, &output_id));
331   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
332 
333   ASSERT_EQ(
334     xnn_status_success,
335     xnn_define_deconvolution_2d(
336       subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
337       kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
338       group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
339       /*flags=*/0));
340 
341   ASSERT_EQ(subgraph->num_nodes, 1);
342   const struct xnn_node* node = &subgraph->nodes[0];
343   ASSERT_EQ(node->type, xnn_node_type_deconvolution_2d);
344   ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
345   ASSERT_EQ(node->params.deconvolution_2d.padding_top, padding_top);
346   ASSERT_EQ(node->params.deconvolution_2d.padding_right, padding_right);
347   ASSERT_EQ(node->params.deconvolution_2d.padding_bottom, padding_bottom);
348   ASSERT_EQ(node->params.deconvolution_2d.padding_left, padding_left);
349   ASSERT_EQ(node->params.deconvolution_2d.kernel_height, kernel_height);
350   ASSERT_EQ(node->params.deconvolution_2d.kernel_width, kernel_width);
351   ASSERT_EQ(node->params.deconvolution_2d.upsampling_height, upsampling_height);
352   ASSERT_EQ(node->params.deconvolution_2d.upsampling_width, upsampling_width);
353   ASSERT_EQ(node->params.deconvolution_2d.dilation_height, dilation_height);
354   ASSERT_EQ(node->params.deconvolution_2d.dilation_width, dilation_width);
355   ASSERT_EQ(node->params.deconvolution_2d.adjustment_height, adjustment_height);
356   ASSERT_EQ(node->params.deconvolution_2d.adjustment_width, adjustment_width);
357   ASSERT_EQ(node->params.deconvolution_2d.groups, groups);
358   ASSERT_EQ(node->params.deconvolution_2d.group_input_channels, group_input_channels);
359   ASSERT_EQ(node->params.deconvolution_2d.group_output_channels, group_output_channels);
360   ASSERT_EQ(node->activation.output_min, output_min);
361   ASSERT_EQ(node->activation.output_max, output_max);
362   ASSERT_EQ(node->num_inputs, 3);
363   ASSERT_EQ(node->inputs[0], input_id);
364   ASSERT_EQ(node->inputs[1], kernel_id);
365   ASSERT_EQ(node->inputs[2], bias_id);
366   ASSERT_EQ(node->num_outputs, 1);
367   ASSERT_EQ(node->outputs[0], output_id);
368   ASSERT_EQ(node->flags, 0);
369 }
370 
TEST_F(DeconvolutionTestQS8,matches_operator_api)371 TEST_F(DeconvolutionTestQS8, matches_operator_api)
372 {
373   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
374 
375   xnn_operator_t op = nullptr;
376 
377   std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
378   std::generate(kernel.begin(), kernel.end(), [&]() { return w8dist(rng); });
379   std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
380   std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
381   std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
382   const int8_t input_zero_point = 1;
383   const float input_scale = scale_dist(rng);
384   const float kernel_scale = scale_dist(rng);
385 
386   for (size_t i = 0; i < batch_size; i++) {
387     for (size_t oy = 0; oy < output_height; oy++) {
388       for (size_t ox = 0; ox < output_width; ox++) {
389         for (size_t ky = 0; ky < kernel_height; ky++) {
390           const size_t y = oy + padding_top - ky * dilation_height;
391           const size_t iy = y / upsampling_height;
392           if (iy * upsampling_height == y && iy < input_height) {
393             for (size_t kx = 0; kx < kernel_width; kx++) {
394               const size_t x = ox + padding_left - kx * dilation_width;
395               const size_t ix = x / upsampling_width;
396               if (ix * upsampling_width == x && ix < input_width) {
397                 for (size_t g = 0; g < groups; g++) {
398                   for (size_t oc = 0; oc < group_output_channels; oc++) {
399                     for (size_t ic = 0; ic < group_input_channels; ic++) {
400                       accumulators
401                         [(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
402                         (int32_t(input[((i * input_height + iy) * input_width + ix) * g * group_input_channels + ic]) -
403                          int32_t(input_zero_point)) *
404                         int32_t(kernel
405                                   [(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) *
406                                      group_input_channels +
407                                    ic]);
408                     }
409                   }
410                 }
411               }
412             }
413           }
414         }
415       }
416     }
417   }
418 
419   // Compute renormalization parameters.
420   const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
421   const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
422 
423   float output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
424   int8_t output_zero_point = int8_t(std::max(
425     std::min(
426       lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
427       long(std::numeric_limits<int8_t>::max())),
428     long(std::numeric_limits<int8_t>::min())));
429   const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
430   const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
431 
432   // Call operator API.
433   const xnn_status status = xnn_create_deconvolution2d_nhwc_qs8(
434     padding_top, padding_right, padding_bottom, padding_left, kernel_height, kernel_width, upsampling_height,
435     upsampling_width, dilation_height, dilation_width, groups, group_input_channels, group_output_channels,
436     groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale, kernel_scale,
437     kernel.data(), bias.data(), output_zero_point, output_scale, quantized_output_min, quantized_output_max,
438     /*flags=*/0, nullptr, &op);
439   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
440 
441   if (status == xnn_status_unsupported_hardware) {
442     GTEST_SKIP();
443   }
444 
445   ASSERT_EQ(xnn_status_success, status);
446   ASSERT_NE(nullptr, op);
447   ASSERT_EQ(
448     xnn_status_success, xnn_setup_deconvolution2d_nhwc_qs8(
449                           op, batch_size, input_height, input_width, adjustment_height, adjustment_width, input.data(),
450                           operator_output.data(),
451                           /*threadpool=*/nullptr));
452 
453   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
454 
455   // Call subgraph API.
456   xnn_subgraph_t subgraph = nullptr;
457   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
458   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
459 
460   uint32_t input_id = XNN_INVALID_NODE_ID;
461   ASSERT_EQ(
462     xnn_status_success, xnn_define_quantized_tensor_value(
463                           subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(),
464                           input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
465   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
466 
467   uint32_t kernel_id = XNN_INVALID_NODE_ID;
468   ASSERT_EQ(
469     xnn_status_success, xnn_define_quantized_tensor_value(
470                           subgraph, xnn_datatype_qint8, 0, kernel_scale, kernel_dims.size(), kernel_dims.data(),
471                           kernel.data(), /*external_id=*/1, /*flags=*/0, &kernel_id));
472 
473   uint32_t bias_id = XNN_INVALID_NODE_ID;
474   ASSERT_EQ(
475     xnn_status_success, xnn_define_quantized_tensor_value(
476                           subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
477                           bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
478 
479   uint32_t output_id = XNN_INVALID_NODE_ID;
480   ASSERT_EQ(
481     xnn_status_success, xnn_define_quantized_tensor_value(
482                           subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(),
483                           output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
484   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
485   ASSERT_EQ(
486     xnn_status_success,
487     xnn_define_deconvolution_2d(
488       subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
489       kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
490       group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
491       /*flags=*/0));
492 
493   xnn_runtime_t runtime = nullptr;
494   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
495   ASSERT_NE(nullptr, runtime);
496   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
497   std::array<xnn_external_value, 2> external = {
498     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
499   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
500   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
501 
502   // Check outputs match.
503   for (size_t i = 0; i < operator_output.size(); i++) {
504     ASSERT_EQ(subgraph_output[i], operator_output[i]);
505   }
506 }
507 
TEST_F(DeconvolutionTestQU8,matches_operator_api)508 TEST_F(DeconvolutionTestQU8, matches_operator_api)
509 {
510   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
511 
512   xnn_operator_t op = nullptr;
513 
514   std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
515   std::generate(kernel.begin(), kernel.end(), [&]() { return u8dist(rng); });
516   std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
517   std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
518   std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
519   const uint8_t input_zero_point = u8dist(rng);
520   const uint8_t kernel_zero_point = 0;
521   const float input_scale = scale_dist(rng);
522   const float kernel_scale = scale_dist(rng);
523 
524   // Compute reference results, without renormalization.
525   initialize_accumulators_from_bias();
526   for (size_t i = 0; i < batch_size; i++) {
527     for (size_t oy = 0; oy < output_height; oy++) {
528       for (size_t ox = 0; ox < output_width; ox++) {
529         for (size_t ky = 0; ky < kernel_height; ky++) {
530           const size_t y = oy + padding_top - ky * dilation_height;
531           const size_t iy = y / upsampling_height;
532           if (iy * upsampling_height == y && iy < input_height) {
533             for (size_t kx = 0; kx < kernel_width; kx++) {
534               const size_t x = ox + padding_left - kx * dilation_width;
535               const size_t ix = x / upsampling_width;
536               if (ix * upsampling_width == x && ix < input_width) {
537                 for (size_t g = 0; g < groups; g++) {
538                   for (size_t oc = 0; oc < group_output_channels; oc++) {
539                     for (size_t ic = 0; ic < group_input_channels; ic++) {
540                       accumulators
541                         [(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
542                         (int32_t(input[((i * input_height + iy) * input_width + ix) * g * group_input_channels + ic]) -
543                          int32_t(input_zero_point)) *
544                         (int32_t(kernel
545                                    [(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) *
546                                       group_input_channels +
547                                     ic]) -
548                          int32_t(kernel_zero_point));
549                     }
550                   }
551                 }
552               }
553             }
554           }
555         }
556       }
557     }
558   }
559 
560   // Compute renormalization parameters.
561   const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
562   const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
563 
564   const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
565   const uint8_t output_zero_point = uint8_t(std::max(
566     std::min(
567       lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
568       long(std::numeric_limits<uint8_t>::max())),
569     long(std::numeric_limits<uint8_t>::min())));
570   const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point);
571   const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point);
572 
573   // Call operator API.
574   const xnn_status status = xnn_create_deconvolution2d_nhwc_qu8(
575     padding_top, padding_right, padding_bottom, padding_left, kernel_height, kernel_width, upsampling_height,
576     upsampling_width, dilation_height, dilation_width, groups, group_input_channels, group_output_channels,
577     groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale, kernel_zero_point,
578     kernel_scale, kernel.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
579     quantized_output_max, /*flags=*/0, nullptr, &op);
580   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
581 
582   if (status == xnn_status_unsupported_hardware) {
583     GTEST_SKIP();
584   }
585 
586   ASSERT_EQ(xnn_status_success, status);
587   ASSERT_NE(nullptr, op);
588   ASSERT_EQ(
589     xnn_status_success, xnn_setup_deconvolution2d_nhwc_qu8(
590                           op, batch_size, input_height, input_width, adjustment_height, adjustment_width, input.data(),
591                           operator_output.data(),
592                           /*threadpool=*/nullptr));
593 
594   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
595 
596   // Call subgraph API.
597   xnn_subgraph_t subgraph = nullptr;
598   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
599   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
600 
601   uint32_t input_id = XNN_INVALID_NODE_ID;
602   ASSERT_EQ(
603     xnn_status_success, xnn_define_quantized_tensor_value(
604                           subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(),
605                           input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
606   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
607 
608   uint32_t kernel_id = XNN_INVALID_NODE_ID;
609   ASSERT_EQ(
610     xnn_status_success, xnn_define_quantized_tensor_value(
611                           subgraph, xnn_datatype_quint8, 0, kernel_scale, kernel_dims.size(), kernel_dims.data(),
612                           kernel.data(), /*external_id=*/1, /*flags=*/0, &kernel_id));
613 
614   uint32_t bias_id = XNN_INVALID_NODE_ID;
615   ASSERT_EQ(
616     xnn_status_success, xnn_define_quantized_tensor_value(
617                           subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
618                           bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
619 
620   uint32_t output_id = XNN_INVALID_NODE_ID;
621   ASSERT_EQ(
622     xnn_status_success, xnn_define_quantized_tensor_value(
623                           subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(),
624                           output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
625   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
626   ASSERT_EQ(
627     xnn_status_success,
628     xnn_define_deconvolution_2d(
629       subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
630       kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
631       group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
632       /*flags=*/0));
633 
634   xnn_runtime_t runtime = nullptr;
635   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
636   ASSERT_NE(nullptr, runtime);
637   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
638   std::array<xnn_external_value, 2> external = {
639     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
640   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
641   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
642 
643   // Check outputs match.
644   for (size_t i = 0; i < operator_output.size(); i++) {
645     ASSERT_EQ(subgraph_output[i], operator_output[i]);
646   }
647 }
648 
TEST_F(DeconvolutionTestF32,matches_operator_api)649 TEST_F(DeconvolutionTestF32, matches_operator_api)
650 {
651   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
652 
653   xnn_operator_t op = nullptr;
654 
655   std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
656   std::generate(kernel.begin(), kernel.end(), [&]() { return f32dist(rng); });
657   std::generate(bias.begin(), bias.end(), [&]() { return f32dist(rng); });
658   std::fill(operator_output.begin(), operator_output.end(), nanf(""));
659   std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
660 
661   // Call operator API.
662   const xnn_status status = xnn_create_deconvolution2d_nhwc_f32(
663     padding_top, padding_right, padding_bottom, padding_left, kernel_height, kernel_width, upsampling_height,
664     upsampling_width, dilation_height, dilation_width, groups, group_input_channels, group_output_channels,
665     groups * group_input_channels, groups * group_output_channels, kernel.data(), bias.data(), output_min, output_max,
666     /*flags=*/0, nullptr, &op);
667   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
668 
669   if (status == xnn_status_unsupported_hardware) {
670     GTEST_SKIP();
671   }
672 
673   ASSERT_EQ(xnn_status_success, status);
674   ASSERT_NE(nullptr, op);
675   ASSERT_EQ(
676     xnn_status_success, xnn_setup_deconvolution2d_nhwc_f32(
677                           op, batch_size, input_height, input_width, adjustment_height, adjustment_width, input.data(),
678                           operator_output.data(),
679                           /*threadpool=*/nullptr));
680 
681   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
682 
683   // Call subgraph API.
684   xnn_subgraph_t subgraph = nullptr;
685   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
686   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
687 
688   uint32_t input_id = XNN_INVALID_NODE_ID;
689   ASSERT_EQ(
690     xnn_status_success, xnn_define_tensor_value(
691                           subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
692                           /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
693   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
694 
695   uint32_t kernel_id = XNN_INVALID_NODE_ID;
696   ASSERT_EQ(
697     xnn_status_success, xnn_define_tensor_value(
698                           subgraph, xnn_datatype_fp32, kernel_dims.size(), kernel_dims.data(), kernel.data(),
699                           /*external_id=*/1, /*flags=*/0, &kernel_id));
700 
701   uint32_t bias_id = XNN_INVALID_NODE_ID;
702   ASSERT_EQ(
703     xnn_status_success, xnn_define_tensor_value(
704                           subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
705                           /*external_id=*/2, /*flags=*/0, &bias_id));
706 
707   uint32_t output_id = XNN_INVALID_NODE_ID;
708   ASSERT_EQ(
709     xnn_status_success, xnn_define_tensor_value(
710                           subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
711                           /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
712   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
713   ASSERT_EQ(
714     xnn_status_success,
715     xnn_define_deconvolution_2d(
716       subgraph, padding_top, padding_right, padding_bottom, padding_left, adjustment_height, adjustment_width,
717       kernel_height, kernel_width, upsampling_height, upsampling_width, dilation_height, dilation_width, groups,
718       group_input_channels, group_output_channels, output_min, output_max, input_id, kernel_id, bias_id, output_id,
719       /*flags=*/0));
720 
721   xnn_runtime_t runtime = nullptr;
722   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
723   ASSERT_NE(nullptr, runtime);
724   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
725   std::array<xnn_external_value, 2> external = {
726     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
727   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
728   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
729 
730   // Check outputs match.
731   for (size_t i = 0; i < operator_output.size(); i++) {
732     ASSERT_EQ(subgraph_output[i], operator_output[i]);
733   }
734 }
735