xref: /aosp_15_r20/external/XNNPACK/test/depthwise-convolution-2d.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <array>
8 #include <cmath>
9 #include <cstddef>
10 #include <cstdint>
11 #include <memory>
12 #include <random>
13 #include <type_traits>
14 #include <vector>
15 
16 #include <xnnpack.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/requantization.h>
19 #include <xnnpack/subgraph.h>
20 
21 #include "convolution-test-helpers.h"
22 #include <gtest/gtest.h>
23 
24 namespace xnnpack {
25 
26 template <class T, class BiasType = T> class DepthwiseConvolutionTestBase : public ::testing::Test {
27 protected:
DepthwiseConvolutionTestBase()28   DepthwiseConvolutionTestBase()
29   {
30     random_device = std::unique_ptr<std::random_device>(new std::random_device());
31     rng = std::mt19937((*random_device)());
32     input_size_dist = std::uniform_int_distribution<uint32_t>(10, 15);
33     kernel_size_dist = std::uniform_int_distribution<uint32_t>(1, 5);
34     stride_dist = std::uniform_int_distribution<uint32_t>(1, 2);
35     f32dist = std::uniform_real_distribution<float>(0.1f, 1.0f);
36     i32dist = std::uniform_int_distribution<int32_t>(-10000, 10000);
37 
38     batch_size = input_size_dist(rng);
39     input_height = input_size_dist(rng);
40     input_width = input_size_dist(rng);
41     input_channels = input_size_dist(rng);
42     kernel_height = kernel_size_dist(rng);
43     kernel_width = kernel_size_dist(rng);
44     subsampling_height = stride_dist(rng);
45     subsampling_width = stride_dist(rng);
46     depth_multiplier = kernel_size_dist(rng);
47     dilation_height = stride_dist(rng);
48     dilation_width = stride_dist(rng);
49     input_padding_top = kernel_size_dist(rng);
50     input_padding_right = kernel_size_dist(rng);
51     input_padding_bottom = kernel_size_dist(rng);
52     input_padding_left = kernel_size_dist(rng);
53     output_height = xnn_compute_convolution_output_dimension(
54       input_padding_top + input_height + input_padding_bottom, kernel_height, dilation_height, subsampling_height);
55     output_width = xnn_compute_convolution_output_dimension(
56       input_padding_left + input_width + input_padding_right, kernel_width, dilation_width, subsampling_width);
57     output_channels = input_channels * depth_multiplier;
58     output_min = -std::numeric_limits<float>::infinity();
59     output_max = std::numeric_limits<float>::infinity();
60 
61     input_dims = {{batch_size, input_height, input_width, input_channels}};
62     filter_dims = {{1, kernel_height, kernel_width, output_channels}};
63     bias_dims = {{output_channels}};
64     output_dims = {{batch_size, output_height, output_width, output_channels}};
65 
66     input = std::vector<T>(XNN_EXTRA_BYTES / sizeof(T) + batch_size * input_height * input_width * input_channels);
67     filter = std::vector<T>(batch_size * kernel_height * kernel_width * output_channels);
68     bias = std::vector<BiasType>(output_channels);
69     operator_output = std::vector<T>(batch_size * output_height * output_width * output_channels);
70     subgraph_output = std::vector<T>(batch_size * output_height * output_width * output_channels);
71   }
72 
73   std::unique_ptr<std::random_device> random_device;
74   std::mt19937 rng;
75   std::uniform_int_distribution<uint32_t> input_size_dist;
76   std::uniform_int_distribution<uint32_t> kernel_size_dist;
77   std::uniform_int_distribution<uint32_t> stride_dist;
78   std::uniform_int_distribution<int32_t> i32dist;
79   std::uniform_real_distribution<float> f32dist;
80 
81   uint32_t input_padding_top;
82   uint32_t input_padding_right;
83   uint32_t input_padding_bottom;
84   uint32_t input_padding_left;
85   uint32_t batch_size;
86   uint32_t input_height;
87   uint32_t input_width;
88   uint32_t kernel_height;
89   uint32_t kernel_width;
90   uint32_t subsampling_height;
91   uint32_t subsampling_width;
92   uint32_t dilation_height;
93   uint32_t dilation_width;
94   uint32_t depth_multiplier;
95   uint32_t input_channels;
96   uint32_t output_channels;
97   float output_min;
98   float output_max;
99   uint32_t output_height;
100   uint32_t output_width;
101 
102   std::array<size_t, 4> input_dims;
103   std::array<size_t, 4> filter_dims;
104   std::array<size_t, 1> bias_dims;
105   std::array<size_t, 4> output_dims;
106 
107   std::vector<T> input;
108   std::vector<T> filter;
109   std::vector<BiasType> bias;
110   std::vector<T> operator_output;
111   std::vector<T> subgraph_output;
112 };
113 
114 template <class T> class QuantizedDepthwiseConvolutionTestBase : public DepthwiseConvolutionTestBase<T, int32_t> {
115 protected:
QuantizedDepthwiseConvolutionTestBase()116   QuantizedDepthwiseConvolutionTestBase()
117   {
118     i8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
119     w8dist = std::uniform_int_distribution<int32_t>(-std::numeric_limits<T>::max(), std::numeric_limits<T>::max());
120     u8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
121     accumulators = std::vector<int32_t>(
122       this->batch_size * this->output_height * this->output_width * this->input_channels * this->depth_multiplier);
123     scale_dist = std::uniform_real_distribution<float>(1.0f, 5.0f);
124 
125     input_scale = scale_dist(this->rng);
126     kernel_scale = scale_dist(this->rng);
127     if (std::is_same<T, int8_t>::value) {
128       input_zero_point = i8dist(this->rng);
129       kernel_zero_point = i8dist(this->rng);
130     }
131     else {
132       input_zero_point = u8dist(this->rng);
133       kernel_zero_point = 0;
134     }
135   }
136 
137   std::uniform_int_distribution<int32_t> i8dist;
138   std::uniform_int_distribution<int32_t> u8dist;
139   std::uniform_int_distribution<int32_t> w8dist;
140   std::uniform_real_distribution<float> scale_dist;
141   std::vector<int32_t> accumulators;
142 
143   float input_scale;
144   float kernel_scale;
145   float output_scale = 1.0f;
146 
147   typedef typename std::conditional<std::is_same<T, uint8_t>::value, uint8_t, int8_t>::type ZeroPointType;
148   ZeroPointType input_zero_point;
149   ZeroPointType kernel_zero_point;
150   ZeroPointType output_zero_point = 0;
151 };
152 
153 using DepthwiseConvolutionTestQC8 = QuantizedDepthwiseConvolutionTestBase<int8_t>;
154 using DepthwiseConvolutionTestQS8 = QuantizedDepthwiseConvolutionTestBase<int8_t>;
155 using DepthwiseConvolutionTestQU8 = QuantizedDepthwiseConvolutionTestBase<uint8_t>;
156 using DepthwiseConvolutionTestF32 = DepthwiseConvolutionTestBase<float>;
157 
TEST_F(DepthwiseConvolutionTestQC8,define)158 TEST_F(DepthwiseConvolutionTestQC8, define)
159 {
160   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
161   std::vector<float> requantization_scales(input_channels * depth_multiplier, 1.0f);
162 
163   xnn_subgraph_t subgraph = nullptr;
164   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
165   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
166 
167   uint32_t input_id = XNN_INVALID_NODE_ID;
168   ASSERT_EQ(
169     xnn_status_success,
170     xnn_define_quantized_tensor_value(
171       subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
172       /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
173   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
174 
175   uint32_t filter_id = XNN_INVALID_NODE_ID;
176   ASSERT_EQ(
177     xnn_status_success, xnn_define_channelwise_quantized_tensor_value(
178                           subgraph, xnn_datatype_qcint8, requantization_scales.data(), filter_dims.size(), 3,
179                           filter_dims.data(), filter.data(), /*external_id=*/1,
180                           /*flags=*/0, &filter_id));
181 
182   uint32_t bias_id = XNN_INVALID_NODE_ID;
183   ASSERT_EQ(
184     xnn_status_success,
185     xnn_define_channelwise_quantized_tensor_value(
186       subgraph, xnn_datatype_qcint32, requantization_scales.data(), bias_dims.size(), 0, bias_dims.data(), bias.data(),
187       /*external_id=*/2, /*flags=*/0, &bias_id));
188 
189   uint32_t output_id = XNN_INVALID_NODE_ID;
190   ASSERT_EQ(
191     xnn_status_success,
192     xnn_define_quantized_tensor_value(
193       subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
194       /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
195   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
196 
197   ASSERT_EQ(
198     xnn_status_success,
199     xnn_define_depthwise_convolution_2d(
200       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
201       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
202       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
203       /*flags=*/0));
204 
205   ASSERT_EQ(subgraph->num_nodes, 1);
206   const struct xnn_node* node = &subgraph->nodes[0];
207   ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
208   ASSERT_EQ(node->compute_type, xnn_compute_type_qc8);
209   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
210   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
211   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
212   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
213   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
214   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
215   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
216   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
217   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
218   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
219   ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
220   ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
221   ASSERT_EQ(node->activation.output_min, output_min);
222   ASSERT_EQ(node->activation.output_max, output_max);
223   ASSERT_EQ(node->num_inputs, 3);
224   ASSERT_EQ(node->inputs[0], input_id);
225   ASSERT_EQ(node->inputs[1], filter_id);
226   ASSERT_EQ(node->inputs[2], bias_id);
227   ASSERT_EQ(node->num_outputs, 1);
228   ASSERT_EQ(node->outputs[0], output_id);
229   ASSERT_EQ(node->flags, 0);
230 }
231 
TEST_F(DepthwiseConvolutionTestQS8,define)232 TEST_F(DepthwiseConvolutionTestQS8, define)
233 {
234   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
235 
236   xnn_subgraph_t subgraph = nullptr;
237   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
238   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
239 
240   uint32_t input_id = XNN_INVALID_NODE_ID;
241   ASSERT_EQ(
242     xnn_status_success,
243     xnn_define_quantized_tensor_value(
244       subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
245       /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
246   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
247 
248   uint32_t filter_id = XNN_INVALID_NODE_ID;
249   ASSERT_EQ(
250     xnn_status_success, xnn_define_quantized_tensor_value(
251                           subgraph, xnn_datatype_qint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
252                           filter.data(), /*external_id=*/1,
253                           /*flags=*/0, &filter_id));
254 
255   uint32_t bias_id = XNN_INVALID_NODE_ID;
256   ASSERT_EQ(
257     xnn_status_success,
258     xnn_define_quantized_tensor_value(
259       subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
260       /*external_id=*/2, /*flags=*/0, &bias_id));
261 
262   uint32_t output_id = XNN_INVALID_NODE_ID;
263   ASSERT_EQ(
264     xnn_status_success,
265     xnn_define_quantized_tensor_value(
266       subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
267       /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
268   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
269 
270   ASSERT_EQ(
271     xnn_status_success,
272     xnn_define_depthwise_convolution_2d(
273       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
274       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
275       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
276       /*flags=*/0));
277 
278   ASSERT_EQ(subgraph->num_nodes, 1);
279   const struct xnn_node* node = &subgraph->nodes[0];
280   ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
281   ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
282   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
283   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
284   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
285   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
286   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
287   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
288   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
289   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
290   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
291   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
292   ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
293   ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
294   ASSERT_EQ(node->activation.output_min, output_min);
295   ASSERT_EQ(node->activation.output_max, output_max);
296   ASSERT_EQ(node->num_inputs, 3);
297   ASSERT_EQ(node->inputs[0], input_id);
298   ASSERT_EQ(node->inputs[1], filter_id);
299   ASSERT_EQ(node->inputs[2], bias_id);
300   ASSERT_EQ(node->num_outputs, 1);
301   ASSERT_EQ(node->outputs[0], output_id);
302   ASSERT_EQ(node->flags, 0);
303 }
304 
TEST_F(DepthwiseConvolutionTestQU8,define)305 TEST_F(DepthwiseConvolutionTestQU8, define)
306 {
307   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
308 
309   xnn_subgraph_t subgraph = nullptr;
310   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
311   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
312 
313   uint32_t input_id = XNN_INVALID_NODE_ID;
314   ASSERT_EQ(
315     xnn_status_success,
316     xnn_define_quantized_tensor_value(
317       subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
318       /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
319   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
320 
321   uint32_t filter_id = XNN_INVALID_NODE_ID;
322   ASSERT_EQ(
323     xnn_status_success, xnn_define_quantized_tensor_value(
324                           subgraph, xnn_datatype_quint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
325                           filter.data(), /*external_id=*/1,
326                           /*flags=*/0, &filter_id));
327 
328   uint32_t bias_id = XNN_INVALID_NODE_ID;
329   ASSERT_EQ(
330     xnn_status_success,
331     xnn_define_quantized_tensor_value(
332       subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
333       /*external_id=*/2, /*flags=*/0, &bias_id));
334 
335   uint32_t output_id = XNN_INVALID_NODE_ID;
336   ASSERT_EQ(
337     xnn_status_success,
338     xnn_define_quantized_tensor_value(
339       subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
340       /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
341   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
342 
343   ASSERT_EQ(
344     xnn_status_success,
345     xnn_define_depthwise_convolution_2d(
346       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
347       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
348       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
349       /*flags=*/0));
350 
351   ASSERT_EQ(subgraph->num_nodes, 1);
352   const struct xnn_node* node = &subgraph->nodes[0];
353   ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
354   ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
355   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
356   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
357   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
358   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
359   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
360   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
361   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
362   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
363   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
364   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
365   ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
366   ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
367   ASSERT_EQ(node->activation.output_min, output_min);
368   ASSERT_EQ(node->activation.output_max, output_max);
369   ASSERT_EQ(node->num_inputs, 3);
370   ASSERT_EQ(node->inputs[0], input_id);
371   ASSERT_EQ(node->inputs[1], filter_id);
372   ASSERT_EQ(node->inputs[2], bias_id);
373   ASSERT_EQ(node->num_outputs, 1);
374   ASSERT_EQ(node->outputs[0], output_id);
375   ASSERT_EQ(node->flags, 0);
376 }
377 
TEST_F(DepthwiseConvolutionTestF32,define)378 TEST_F(DepthwiseConvolutionTestF32, define)
379 {
380   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
381 
382   xnn_subgraph_t subgraph = nullptr;
383   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
384   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
385 
386   uint32_t input_id = XNN_INVALID_NODE_ID;
387   ASSERT_EQ(
388     xnn_status_success, xnn_define_tensor_value(
389                           subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
390                           /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
391   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
392 
393   uint32_t filter_id = XNN_INVALID_NODE_ID;
394   ASSERT_EQ(
395     xnn_status_success,
396     xnn_define_tensor_value(
397       subgraph, xnn_datatype_fp32, filter_dims.size(), filter_dims.data(), filter.data(), /*external_id=*/1,
398       /*flags=*/0, &filter_id));
399 
400   uint32_t bias_id = XNN_INVALID_NODE_ID;
401   ASSERT_EQ(
402     xnn_status_success, xnn_define_tensor_value(
403                           subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
404                           /*external_id=*/2, /*flags=*/0, &bias_id));
405 
406   uint32_t output_id = XNN_INVALID_NODE_ID;
407   ASSERT_EQ(
408     xnn_status_success, xnn_define_tensor_value(
409                           subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
410                           /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
411   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
412 
413   ASSERT_EQ(
414     xnn_status_success,
415     xnn_define_depthwise_convolution_2d(
416       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
417       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
418       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
419       /*flags=*/0));
420 
421   ASSERT_EQ(subgraph->num_nodes, 1);
422   const struct xnn_node* node = &subgraph->nodes[0];
423   ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
424   ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
425   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
426   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
427   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
428   ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
429   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
430   ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
431   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
432   ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
433   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
434   ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
435   ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
436   ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
437   ASSERT_EQ(node->activation.output_min, output_min);
438   ASSERT_EQ(node->activation.output_max, output_max);
439   ASSERT_EQ(node->num_inputs, 3);
440   ASSERT_EQ(node->inputs[0], input_id);
441   ASSERT_EQ(node->inputs[1], filter_id);
442   ASSERT_EQ(node->inputs[2], bias_id);
443   ASSERT_EQ(node->num_outputs, 1);
444   ASSERT_EQ(node->outputs[0], output_id);
445   ASSERT_EQ(node->flags, 0);
446 }
447 
TEST_F(DepthwiseConvolutionTestQC8,matches_operator_api)448 TEST_F(DepthwiseConvolutionTestQC8, matches_operator_api)
449 {
450   std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
451   std::generate(filter.begin(), filter.end(), [&]() { return w8dist(rng); });
452   std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
453   std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
454   std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
455   std::vector<float> requantization_scales(input_channels * depth_multiplier);
456   const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
457   const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
458 
459   // Compute reference results, without renormalization.
460   compute_depthwise_convolution_qs8_reference_results(
461       batch_size,
462       output_height,
463       output_width,
464       input_height,
465       input_width,
466       input_padding_top,
467       input_padding_right,
468       input_padding_bottom,
469       input_padding_left,
470       kernel_height,
471       kernel_width,
472       subsampling_height,
473       subsampling_width,
474       dilation_height,
475       dilation_width,
476       input_channels,
477       depth_multiplier,
478       input_zero_point,
479       input,
480       filter,
481       accumulators,
482       /*has_bias=*/true,
483       bias);
484 
485   // Compute renormalization parameters.
486   for (size_t c = 0; c < input_channels * depth_multiplier; c++) {
487     int32_t accumulated_min = accumulators[c];
488     int32_t accumulated_max = accumulators[c];
489     for (size_t px = 0; px < batch_size * output_height * output_width; px++) {
490       accumulated_min = std::min(accumulated_min, accumulators[px * input_channels * depth_multiplier + c]);
491       accumulated_max = std::max(accumulated_max, accumulators[px * input_channels * depth_multiplier + c]);
492     }
493 
494     float requantization_scale = 0x1.0p-32f;
495     if (accumulated_max != 0) {
496       requantization_scale = std::max(
497         requantization_scale,
498         float(int32_t(std::numeric_limits<int8_t>::max()) - int32_t(output_zero_point)) / float(accumulated_max));
499     }
500     if (accumulated_min != 0) {
501       requantization_scale = std::max(
502         requantization_scale,
503         float(int32_t(std::numeric_limits<int8_t>::min()) - int32_t(output_zero_point)) / float(accumulated_min));
504     }
505     requantization_scale = std::min(requantization_scale, 0x1.FFFFFEp-1f);
506 
507     requantization_scales[c] = requantization_scale;
508   }
509 
510   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
511   xnn_operator_t op = nullptr;
512 
513   // Call operator API.
514   const xnn_status status = xnn_create_convolution2d_nhwc_qc8(
515     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
516     subsampling_height, subsampling_width, dilation_height, dilation_width,
517     /*groups=*/input_channels, /*group_input_channels=*/1,
518     /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, input_zero_point,
519     input_scale, requantization_scales.data(), filter.data(), bias.data(), output_zero_point, output_scale,
520     quantized_output_min, quantized_output_max,
521     /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
522   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
523 
524   if (status == xnn_status_unsupported_hardware) {
525     GTEST_SKIP();
526   }
527 
528   ASSERT_EQ(xnn_status_success, status);
529   ASSERT_NE(nullptr, op);
530   ASSERT_EQ(
531     xnn_status_success, xnn_setup_convolution2d_nhwc_qc8(
532                           op, batch_size, input_height, input_width, input.data(), operator_output.data(),
533                           /*threadpool=*/nullptr));
534 
535   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
536 
537   // Call subgraph API.
538   xnn_subgraph_t subgraph = nullptr;
539   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
540   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
541 
542   uint32_t input_id = XNN_INVALID_NODE_ID;
543   ASSERT_EQ(
544     xnn_status_success,
545     xnn_define_quantized_tensor_value(
546       subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
547       /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
548   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
549 
550   uint32_t filter_id = XNN_INVALID_NODE_ID;
551   ASSERT_EQ(
552     xnn_status_success, xnn_define_channelwise_quantized_tensor_value(
553                           subgraph, xnn_datatype_qcint8, requantization_scales.data(), filter_dims.size(), 3,
554                           filter_dims.data(), filter.data(), /*external_id=*/1,
555                           /*flags=*/0, &filter_id));
556 
557   uint32_t bias_id = XNN_INVALID_NODE_ID;
558   ASSERT_EQ(
559     xnn_status_success,
560     xnn_define_channelwise_quantized_tensor_value(
561       subgraph, xnn_datatype_qcint32, requantization_scales.data(), bias_dims.size(), 0, bias_dims.data(), bias.data(),
562       /*external_id=*/2, /*flags=*/0, &bias_id));
563 
564   uint32_t output_id = XNN_INVALID_NODE_ID;
565   ASSERT_EQ(
566     xnn_status_success,
567     xnn_define_quantized_tensor_value(
568       subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
569       /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
570   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
571 
572   ASSERT_EQ(
573     xnn_status_success,
574     xnn_define_depthwise_convolution_2d(
575       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
576       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
577       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
578       /*flags=*/0));
579 
580   xnn_runtime_t runtime = nullptr;
581   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
582   ASSERT_NE(nullptr, runtime);
583   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
584   std::array<xnn_external_value, 2> external = {
585     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
586   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
587   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
588 
589   ASSERT_EQ(subgraph_output, operator_output);
590 }
591 
TEST_F(DepthwiseConvolutionTestQS8,matches_operator_api)592 TEST_F(DepthwiseConvolutionTestQS8, matches_operator_api)
593 {
594   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
595 
596   xnn_operator_t op = nullptr;
597 
598   std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
599   std::generate(filter.begin(), filter.end(), [&]() { return w8dist(rng); });
600   std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
601   std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
602   std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
603 
604   compute_convolution_qs8_reference_results(
605       batch_size,
606       output_height,
607       output_width,
608       input_height,
609       input_width,
610       input_padding_top,
611       input_padding_right,
612       input_padding_bottom,
613       input_padding_left,
614       kernel_height,
615       kernel_width,
616       subsampling_height,
617       subsampling_width,
618       dilation_height,
619       dilation_width,
620       /*groups=*/input_channels,
621       /*group_input_channels=*/1,
622       /*group_output_channels=*/depth_multiplier,
623       input_zero_point,
624       input,
625       filter,
626       accumulators,
627       /*has_bias=*/true,
628       bias);
629 
630   // Compute renormalization parameters.
631   const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
632   const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
633 
634   float output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
635   int8_t output_zero_point = int8_t(std::max(
636     std::min(
637       lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
638       long(std::numeric_limits<int8_t>::max())),
639     long(std::numeric_limits<int8_t>::min())));
640   const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
641   const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
642 
643   // Call operator API.
644   const xnn_status status = xnn_create_convolution2d_nhwc_qs8(
645     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
646     subsampling_height, subsampling_width, dilation_height, dilation_width,
647     /*groups=*/input_channels, /*group_input_channels=*/1,
648     /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, input_zero_point,
649     input_scale, kernel_scale, filter.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
650     quantized_output_max,
651     /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
652   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
653 
654   if (status == xnn_status_unsupported_hardware) {
655     GTEST_SKIP();
656   }
657 
658   ASSERT_EQ(xnn_status_success, status);
659   ASSERT_NE(nullptr, op);
660   ASSERT_EQ(
661     xnn_status_success, xnn_setup_convolution2d_nhwc_qs8(
662                           op, batch_size, input_height, input_width, input.data(), operator_output.data(),
663                           /*threadpool=*/nullptr));
664 
665   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
666 
667   // Call subgraph API.
668   xnn_subgraph_t subgraph = nullptr;
669   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
670   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
671 
672   uint32_t input_id = XNN_INVALID_NODE_ID;
673   ASSERT_EQ(
674     xnn_status_success,
675     xnn_define_quantized_tensor_value(
676       subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
677       /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
678   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
679 
680   uint32_t filter_id = XNN_INVALID_NODE_ID;
681   ASSERT_EQ(
682     xnn_status_success, xnn_define_quantized_tensor_value(
683                           subgraph, xnn_datatype_qint8, kernel_zero_point, kernel_scale, filter_dims.size(),
684                           filter_dims.data(), filter.data(), /*external_id=*/1,
685                           /*flags=*/0, &filter_id));
686 
687   uint32_t bias_id = XNN_INVALID_NODE_ID;
688   ASSERT_EQ(
689     xnn_status_success,
690     xnn_define_quantized_tensor_value(
691       subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
692       /*external_id=*/2, /*flags=*/0, &bias_id));
693 
694   uint32_t output_id = XNN_INVALID_NODE_ID;
695   ASSERT_EQ(
696     xnn_status_success,
697     xnn_define_quantized_tensor_value(
698       subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
699       /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
700   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
701 
702   ASSERT_EQ(
703     xnn_status_success,
704     xnn_define_depthwise_convolution_2d(
705       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
706       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
707       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
708       /*flags=*/0));
709 
710   xnn_runtime_t runtime = nullptr;
711   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
712   ASSERT_NE(nullptr, runtime);
713   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
714   std::array<xnn_external_value, 2> external = {
715     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
716   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
717   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
718 
719   ASSERT_EQ(subgraph_output, operator_output);
720 }
721 
TEST_F(DepthwiseConvolutionTestQU8,matches_operator_api)722 TEST_F(DepthwiseConvolutionTestQU8, matches_operator_api)
723 {
724   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
725 
726   xnn_operator_t op = nullptr;
727 
728   std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
729   std::generate(filter.begin(), filter.end(), [&]() { return u8dist(rng); });
730   std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
731   std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
732   std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
733 
734   // Compute reference results, without renormalization.
735   compute_convolution_qu8_reference_results(
736       batch_size,
737       output_height,
738       output_width,
739       input_height,
740       input_width,
741       input_padding_top,
742       input_padding_right,
743       input_padding_bottom,
744       input_padding_left,
745       kernel_height,
746       kernel_width,
747       subsampling_height,
748       subsampling_width,
749       dilation_height,
750       dilation_width,
751       /*groups=*/input_channels,
752       /*group_input_channels=*/1,
753       /*group_output_channels=*/depth_multiplier,
754       input_zero_point,
755       kernel_zero_point,
756       input,
757       filter,
758       accumulators,
759       /*has_bias=*/true,
760       bias);
761 
762   // Compute renormalization parameters.
763   const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
764   const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
765 
766   const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
767   const uint8_t output_zero_point = uint8_t(std::max(
768     std::min(
769       lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
770       long(std::numeric_limits<uint8_t>::max())),
771     long(std::numeric_limits<uint8_t>::min())));
772   const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point);
773   const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point);
774 
775   // Call operator API.
776   const xnn_status status = xnn_create_convolution2d_nhwc_qu8(
777     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
778     subsampling_height, subsampling_width, dilation_height, dilation_width,
779     /*groups=*/input_channels, /*group_input_channels=*/1,
780     /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, input_zero_point,
781     input_scale, kernel_zero_point, kernel_scale, filter.data(), bias.data(), output_zero_point, output_scale,
782     quantized_output_min, quantized_output_max,
783     /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
784   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
785 
786   if (status == xnn_status_unsupported_hardware) {
787     GTEST_SKIP();
788   }
789 
790   ASSERT_EQ(xnn_status_success, status);
791   ASSERT_NE(nullptr, op);
792   ASSERT_EQ(
793     xnn_status_success, xnn_setup_convolution2d_nhwc_qu8(
794                           op, batch_size, input_height, input_width, input.data(), operator_output.data(),
795                           /*threadpool=*/nullptr));
796 
797   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
798 
799   // Call subgraph API.
800   xnn_subgraph_t subgraph = nullptr;
801   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
802   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
803 
804   uint32_t input_id = XNN_INVALID_NODE_ID;
805   ASSERT_EQ(
806     xnn_status_success,
807     xnn_define_quantized_tensor_value(
808       subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
809       /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
810   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
811 
812   uint32_t filter_id = XNN_INVALID_NODE_ID;
813   ASSERT_EQ(
814     xnn_status_success, xnn_define_quantized_tensor_value(
815                           subgraph, xnn_datatype_quint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
816                           filter.data(), /*external_id=*/1,
817                           /*flags=*/0, &filter_id));
818 
819   uint32_t bias_id = XNN_INVALID_NODE_ID;
820   ASSERT_EQ(
821     xnn_status_success,
822     xnn_define_quantized_tensor_value(
823       subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
824       /*external_id=*/2, /*flags=*/0, &bias_id));
825 
826   uint32_t output_id = XNN_INVALID_NODE_ID;
827   ASSERT_EQ(
828     xnn_status_success,
829     xnn_define_quantized_tensor_value(
830       subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
831       /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
832   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
833 
834   ASSERT_EQ(
835     xnn_status_success,
836     xnn_define_depthwise_convolution_2d(
837       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
838       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
839       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
840       /*flags=*/0));
841 
842   xnn_runtime_t runtime = nullptr;
843   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
844   ASSERT_NE(nullptr, runtime);
845   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
846   std::array<xnn_external_value, 2> external = {
847     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
848   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
849   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
850 
851   ASSERT_EQ(subgraph_output, operator_output);
852 }
853 
TEST_F(DepthwiseConvolutionTestF32,matches_operator_api)854 TEST_F(DepthwiseConvolutionTestF32, matches_operator_api)
855 {
856   ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
857 
858   xnn_operator_t op = nullptr;
859 
860   std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
861   std::generate(filter.begin(), filter.end(), [&]() { return f32dist(rng); });
862   std::generate(bias.begin(), bias.end(), [&]() { return f32dist(rng); });
863   std::fill(operator_output.begin(), operator_output.end(), nanf(""));
864   std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
865 
866   // Call operator API.
867   const xnn_status status = xnn_create_convolution2d_nhwc_f32(
868     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
869     subsampling_height, subsampling_width, dilation_height, dilation_width,
870     /*groups=*/input_channels, /*group_input_channels=*/1,
871     /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, filter.data(),
872     bias.data(), output_min, output_max,
873     /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
874   std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
875 
876   if (status == xnn_status_unsupported_hardware) {
877     GTEST_SKIP();
878   }
879 
880   ASSERT_EQ(xnn_status_success, status);
881   ASSERT_NE(nullptr, op);
882   ASSERT_EQ(
883     xnn_status_success, xnn_setup_convolution2d_nhwc_f32(
884                           op, batch_size, input_height, input_width, input.data(), operator_output.data(),
885                           /*threadpool=*/nullptr));
886 
887   ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
888 
889   // Call subgraph API.
890   xnn_subgraph_t subgraph = nullptr;
891   ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
892   std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
893 
894   uint32_t input_id = XNN_INVALID_NODE_ID;
895   ASSERT_EQ(
896     xnn_status_success, xnn_define_tensor_value(
897                           subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
898                           /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
899   ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
900 
901   uint32_t filter_id = XNN_INVALID_NODE_ID;
902   ASSERT_EQ(
903     xnn_status_success, xnn_define_tensor_value(
904                           subgraph, xnn_datatype_fp32, filter_dims.size(), filter_dims.data(), filter.data(),
905                           /*external_id=*/1, /*flags=*/0, &filter_id));
906 
907   uint32_t bias_id = XNN_INVALID_NODE_ID;
908   ASSERT_EQ(
909     xnn_status_success, xnn_define_tensor_value(
910                           subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
911                           /*external_id=*/2, /*flags=*/0, &bias_id));
912 
913   uint32_t output_id = XNN_INVALID_NODE_ID;
914   ASSERT_EQ(
915     xnn_status_success, xnn_define_tensor_value(
916                           subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
917                           /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
918   ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
919   ASSERT_EQ(
920     xnn_status_success,
921     xnn_define_depthwise_convolution_2d(
922       subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
923       kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
924       input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
925       /*flags=*/0));
926 
927   xnn_runtime_t runtime = nullptr;
928   ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
929   ASSERT_NE(nullptr, runtime);
930   std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
931   std::array<xnn_external_value, 2> external = {
932     xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
933   ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
934   ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
935 
936   ASSERT_EQ(subgraph_output, operator_output);
937 }
938 }  // namespace xnnpack
939