1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <array>
8 #include <cmath>
9 #include <cstddef>
10 #include <cstdint>
11 #include <memory>
12 #include <random>
13 #include <type_traits>
14 #include <vector>
15
16 #include <xnnpack.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/requantization.h>
19 #include <xnnpack/subgraph.h>
20
21 #include "convolution-test-helpers.h"
22 #include <gtest/gtest.h>
23
24 namespace xnnpack {
25
26 template <class T, class BiasType = T> class DepthwiseConvolutionTestBase : public ::testing::Test {
27 protected:
DepthwiseConvolutionTestBase()28 DepthwiseConvolutionTestBase()
29 {
30 random_device = std::unique_ptr<std::random_device>(new std::random_device());
31 rng = std::mt19937((*random_device)());
32 input_size_dist = std::uniform_int_distribution<uint32_t>(10, 15);
33 kernel_size_dist = std::uniform_int_distribution<uint32_t>(1, 5);
34 stride_dist = std::uniform_int_distribution<uint32_t>(1, 2);
35 f32dist = std::uniform_real_distribution<float>(0.1f, 1.0f);
36 i32dist = std::uniform_int_distribution<int32_t>(-10000, 10000);
37
38 batch_size = input_size_dist(rng);
39 input_height = input_size_dist(rng);
40 input_width = input_size_dist(rng);
41 input_channels = input_size_dist(rng);
42 kernel_height = kernel_size_dist(rng);
43 kernel_width = kernel_size_dist(rng);
44 subsampling_height = stride_dist(rng);
45 subsampling_width = stride_dist(rng);
46 depth_multiplier = kernel_size_dist(rng);
47 dilation_height = stride_dist(rng);
48 dilation_width = stride_dist(rng);
49 input_padding_top = kernel_size_dist(rng);
50 input_padding_right = kernel_size_dist(rng);
51 input_padding_bottom = kernel_size_dist(rng);
52 input_padding_left = kernel_size_dist(rng);
53 output_height = xnn_compute_convolution_output_dimension(
54 input_padding_top + input_height + input_padding_bottom, kernel_height, dilation_height, subsampling_height);
55 output_width = xnn_compute_convolution_output_dimension(
56 input_padding_left + input_width + input_padding_right, kernel_width, dilation_width, subsampling_width);
57 output_channels = input_channels * depth_multiplier;
58 output_min = -std::numeric_limits<float>::infinity();
59 output_max = std::numeric_limits<float>::infinity();
60
61 input_dims = {{batch_size, input_height, input_width, input_channels}};
62 filter_dims = {{1, kernel_height, kernel_width, output_channels}};
63 bias_dims = {{output_channels}};
64 output_dims = {{batch_size, output_height, output_width, output_channels}};
65
66 input = std::vector<T>(XNN_EXTRA_BYTES / sizeof(T) + batch_size * input_height * input_width * input_channels);
67 filter = std::vector<T>(batch_size * kernel_height * kernel_width * output_channels);
68 bias = std::vector<BiasType>(output_channels);
69 operator_output = std::vector<T>(batch_size * output_height * output_width * output_channels);
70 subgraph_output = std::vector<T>(batch_size * output_height * output_width * output_channels);
71 }
72
73 std::unique_ptr<std::random_device> random_device;
74 std::mt19937 rng;
75 std::uniform_int_distribution<uint32_t> input_size_dist;
76 std::uniform_int_distribution<uint32_t> kernel_size_dist;
77 std::uniform_int_distribution<uint32_t> stride_dist;
78 std::uniform_int_distribution<int32_t> i32dist;
79 std::uniform_real_distribution<float> f32dist;
80
81 uint32_t input_padding_top;
82 uint32_t input_padding_right;
83 uint32_t input_padding_bottom;
84 uint32_t input_padding_left;
85 uint32_t batch_size;
86 uint32_t input_height;
87 uint32_t input_width;
88 uint32_t kernel_height;
89 uint32_t kernel_width;
90 uint32_t subsampling_height;
91 uint32_t subsampling_width;
92 uint32_t dilation_height;
93 uint32_t dilation_width;
94 uint32_t depth_multiplier;
95 uint32_t input_channels;
96 uint32_t output_channels;
97 float output_min;
98 float output_max;
99 uint32_t output_height;
100 uint32_t output_width;
101
102 std::array<size_t, 4> input_dims;
103 std::array<size_t, 4> filter_dims;
104 std::array<size_t, 1> bias_dims;
105 std::array<size_t, 4> output_dims;
106
107 std::vector<T> input;
108 std::vector<T> filter;
109 std::vector<BiasType> bias;
110 std::vector<T> operator_output;
111 std::vector<T> subgraph_output;
112 };
113
114 template <class T> class QuantizedDepthwiseConvolutionTestBase : public DepthwiseConvolutionTestBase<T, int32_t> {
115 protected:
QuantizedDepthwiseConvolutionTestBase()116 QuantizedDepthwiseConvolutionTestBase()
117 {
118 i8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
119 w8dist = std::uniform_int_distribution<int32_t>(-std::numeric_limits<T>::max(), std::numeric_limits<T>::max());
120 u8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
121 accumulators = std::vector<int32_t>(
122 this->batch_size * this->output_height * this->output_width * this->input_channels * this->depth_multiplier);
123 scale_dist = std::uniform_real_distribution<float>(1.0f, 5.0f);
124
125 input_scale = scale_dist(this->rng);
126 kernel_scale = scale_dist(this->rng);
127 if (std::is_same<T, int8_t>::value) {
128 input_zero_point = i8dist(this->rng);
129 kernel_zero_point = i8dist(this->rng);
130 }
131 else {
132 input_zero_point = u8dist(this->rng);
133 kernel_zero_point = 0;
134 }
135 }
136
137 std::uniform_int_distribution<int32_t> i8dist;
138 std::uniform_int_distribution<int32_t> u8dist;
139 std::uniform_int_distribution<int32_t> w8dist;
140 std::uniform_real_distribution<float> scale_dist;
141 std::vector<int32_t> accumulators;
142
143 float input_scale;
144 float kernel_scale;
145 float output_scale = 1.0f;
146
147 typedef typename std::conditional<std::is_same<T, uint8_t>::value, uint8_t, int8_t>::type ZeroPointType;
148 ZeroPointType input_zero_point;
149 ZeroPointType kernel_zero_point;
150 ZeroPointType output_zero_point = 0;
151 };
152
153 using DepthwiseConvolutionTestQC8 = QuantizedDepthwiseConvolutionTestBase<int8_t>;
154 using DepthwiseConvolutionTestQS8 = QuantizedDepthwiseConvolutionTestBase<int8_t>;
155 using DepthwiseConvolutionTestQU8 = QuantizedDepthwiseConvolutionTestBase<uint8_t>;
156 using DepthwiseConvolutionTestF32 = DepthwiseConvolutionTestBase<float>;
157
TEST_F(DepthwiseConvolutionTestQC8,define)158 TEST_F(DepthwiseConvolutionTestQC8, define)
159 {
160 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
161 std::vector<float> requantization_scales(input_channels * depth_multiplier, 1.0f);
162
163 xnn_subgraph_t subgraph = nullptr;
164 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
165 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
166
167 uint32_t input_id = XNN_INVALID_NODE_ID;
168 ASSERT_EQ(
169 xnn_status_success,
170 xnn_define_quantized_tensor_value(
171 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
172 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
173 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
174
175 uint32_t filter_id = XNN_INVALID_NODE_ID;
176 ASSERT_EQ(
177 xnn_status_success, xnn_define_channelwise_quantized_tensor_value(
178 subgraph, xnn_datatype_qcint8, requantization_scales.data(), filter_dims.size(), 3,
179 filter_dims.data(), filter.data(), /*external_id=*/1,
180 /*flags=*/0, &filter_id));
181
182 uint32_t bias_id = XNN_INVALID_NODE_ID;
183 ASSERT_EQ(
184 xnn_status_success,
185 xnn_define_channelwise_quantized_tensor_value(
186 subgraph, xnn_datatype_qcint32, requantization_scales.data(), bias_dims.size(), 0, bias_dims.data(), bias.data(),
187 /*external_id=*/2, /*flags=*/0, &bias_id));
188
189 uint32_t output_id = XNN_INVALID_NODE_ID;
190 ASSERT_EQ(
191 xnn_status_success,
192 xnn_define_quantized_tensor_value(
193 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
194 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
195 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
196
197 ASSERT_EQ(
198 xnn_status_success,
199 xnn_define_depthwise_convolution_2d(
200 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
201 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
202 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
203 /*flags=*/0));
204
205 ASSERT_EQ(subgraph->num_nodes, 1);
206 const struct xnn_node* node = &subgraph->nodes[0];
207 ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
208 ASSERT_EQ(node->compute_type, xnn_compute_type_qc8);
209 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
210 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
211 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
212 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
213 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
214 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
215 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
216 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
217 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
218 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
219 ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
220 ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
221 ASSERT_EQ(node->activation.output_min, output_min);
222 ASSERT_EQ(node->activation.output_max, output_max);
223 ASSERT_EQ(node->num_inputs, 3);
224 ASSERT_EQ(node->inputs[0], input_id);
225 ASSERT_EQ(node->inputs[1], filter_id);
226 ASSERT_EQ(node->inputs[2], bias_id);
227 ASSERT_EQ(node->num_outputs, 1);
228 ASSERT_EQ(node->outputs[0], output_id);
229 ASSERT_EQ(node->flags, 0);
230 }
231
TEST_F(DepthwiseConvolutionTestQS8,define)232 TEST_F(DepthwiseConvolutionTestQS8, define)
233 {
234 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
235
236 xnn_subgraph_t subgraph = nullptr;
237 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
238 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
239
240 uint32_t input_id = XNN_INVALID_NODE_ID;
241 ASSERT_EQ(
242 xnn_status_success,
243 xnn_define_quantized_tensor_value(
244 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
245 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
246 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
247
248 uint32_t filter_id = XNN_INVALID_NODE_ID;
249 ASSERT_EQ(
250 xnn_status_success, xnn_define_quantized_tensor_value(
251 subgraph, xnn_datatype_qint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
252 filter.data(), /*external_id=*/1,
253 /*flags=*/0, &filter_id));
254
255 uint32_t bias_id = XNN_INVALID_NODE_ID;
256 ASSERT_EQ(
257 xnn_status_success,
258 xnn_define_quantized_tensor_value(
259 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
260 /*external_id=*/2, /*flags=*/0, &bias_id));
261
262 uint32_t output_id = XNN_INVALID_NODE_ID;
263 ASSERT_EQ(
264 xnn_status_success,
265 xnn_define_quantized_tensor_value(
266 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
267 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
268 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
269
270 ASSERT_EQ(
271 xnn_status_success,
272 xnn_define_depthwise_convolution_2d(
273 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
274 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
275 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
276 /*flags=*/0));
277
278 ASSERT_EQ(subgraph->num_nodes, 1);
279 const struct xnn_node* node = &subgraph->nodes[0];
280 ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
281 ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
282 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
283 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
284 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
285 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
286 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
287 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
288 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
289 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
290 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
291 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
292 ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
293 ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
294 ASSERT_EQ(node->activation.output_min, output_min);
295 ASSERT_EQ(node->activation.output_max, output_max);
296 ASSERT_EQ(node->num_inputs, 3);
297 ASSERT_EQ(node->inputs[0], input_id);
298 ASSERT_EQ(node->inputs[1], filter_id);
299 ASSERT_EQ(node->inputs[2], bias_id);
300 ASSERT_EQ(node->num_outputs, 1);
301 ASSERT_EQ(node->outputs[0], output_id);
302 ASSERT_EQ(node->flags, 0);
303 }
304
TEST_F(DepthwiseConvolutionTestQU8,define)305 TEST_F(DepthwiseConvolutionTestQU8, define)
306 {
307 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
308
309 xnn_subgraph_t subgraph = nullptr;
310 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
311 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
312
313 uint32_t input_id = XNN_INVALID_NODE_ID;
314 ASSERT_EQ(
315 xnn_status_success,
316 xnn_define_quantized_tensor_value(
317 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
318 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
319 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
320
321 uint32_t filter_id = XNN_INVALID_NODE_ID;
322 ASSERT_EQ(
323 xnn_status_success, xnn_define_quantized_tensor_value(
324 subgraph, xnn_datatype_quint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
325 filter.data(), /*external_id=*/1,
326 /*flags=*/0, &filter_id));
327
328 uint32_t bias_id = XNN_INVALID_NODE_ID;
329 ASSERT_EQ(
330 xnn_status_success,
331 xnn_define_quantized_tensor_value(
332 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
333 /*external_id=*/2, /*flags=*/0, &bias_id));
334
335 uint32_t output_id = XNN_INVALID_NODE_ID;
336 ASSERT_EQ(
337 xnn_status_success,
338 xnn_define_quantized_tensor_value(
339 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
340 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
341 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
342
343 ASSERT_EQ(
344 xnn_status_success,
345 xnn_define_depthwise_convolution_2d(
346 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
347 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
348 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
349 /*flags=*/0));
350
351 ASSERT_EQ(subgraph->num_nodes, 1);
352 const struct xnn_node* node = &subgraph->nodes[0];
353 ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
354 ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
355 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
356 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
357 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
358 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
359 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
360 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
361 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
362 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
363 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
364 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
365 ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
366 ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
367 ASSERT_EQ(node->activation.output_min, output_min);
368 ASSERT_EQ(node->activation.output_max, output_max);
369 ASSERT_EQ(node->num_inputs, 3);
370 ASSERT_EQ(node->inputs[0], input_id);
371 ASSERT_EQ(node->inputs[1], filter_id);
372 ASSERT_EQ(node->inputs[2], bias_id);
373 ASSERT_EQ(node->num_outputs, 1);
374 ASSERT_EQ(node->outputs[0], output_id);
375 ASSERT_EQ(node->flags, 0);
376 }
377
TEST_F(DepthwiseConvolutionTestF32,define)378 TEST_F(DepthwiseConvolutionTestF32, define)
379 {
380 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
381
382 xnn_subgraph_t subgraph = nullptr;
383 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
384 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
385
386 uint32_t input_id = XNN_INVALID_NODE_ID;
387 ASSERT_EQ(
388 xnn_status_success, xnn_define_tensor_value(
389 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
390 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
391 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
392
393 uint32_t filter_id = XNN_INVALID_NODE_ID;
394 ASSERT_EQ(
395 xnn_status_success,
396 xnn_define_tensor_value(
397 subgraph, xnn_datatype_fp32, filter_dims.size(), filter_dims.data(), filter.data(), /*external_id=*/1,
398 /*flags=*/0, &filter_id));
399
400 uint32_t bias_id = XNN_INVALID_NODE_ID;
401 ASSERT_EQ(
402 xnn_status_success, xnn_define_tensor_value(
403 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
404 /*external_id=*/2, /*flags=*/0, &bias_id));
405
406 uint32_t output_id = XNN_INVALID_NODE_ID;
407 ASSERT_EQ(
408 xnn_status_success, xnn_define_tensor_value(
409 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
410 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
411 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
412
413 ASSERT_EQ(
414 xnn_status_success,
415 xnn_define_depthwise_convolution_2d(
416 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
417 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
418 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
419 /*flags=*/0));
420
421 ASSERT_EQ(subgraph->num_nodes, 1);
422 const struct xnn_node* node = &subgraph->nodes[0];
423 ASSERT_EQ(node->type, xnn_node_type_depthwise_convolution_2d);
424 ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
425 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_top, input_padding_top);
426 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_right, input_padding_right);
427 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_bottom, input_padding_bottom);
428 ASSERT_EQ(node->params.depthwise_convolution_2d.input_padding_left, input_padding_left);
429 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_height, kernel_height);
430 ASSERT_EQ(node->params.depthwise_convolution_2d.kernel_width, kernel_width);
431 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_height, subsampling_height);
432 ASSERT_EQ(node->params.depthwise_convolution_2d.subsampling_width, subsampling_width);
433 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_height, dilation_height);
434 ASSERT_EQ(node->params.depthwise_convolution_2d.dilation_width, dilation_width);
435 ASSERT_EQ(node->params.depthwise_convolution_2d.depth_multiplier, depth_multiplier);
436 ASSERT_EQ(node->params.depthwise_convolution_2d.input_channels, input_channels);
437 ASSERT_EQ(node->activation.output_min, output_min);
438 ASSERT_EQ(node->activation.output_max, output_max);
439 ASSERT_EQ(node->num_inputs, 3);
440 ASSERT_EQ(node->inputs[0], input_id);
441 ASSERT_EQ(node->inputs[1], filter_id);
442 ASSERT_EQ(node->inputs[2], bias_id);
443 ASSERT_EQ(node->num_outputs, 1);
444 ASSERT_EQ(node->outputs[0], output_id);
445 ASSERT_EQ(node->flags, 0);
446 }
447
TEST_F(DepthwiseConvolutionTestQC8,matches_operator_api)448 TEST_F(DepthwiseConvolutionTestQC8, matches_operator_api)
449 {
450 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
451 std::generate(filter.begin(), filter.end(), [&]() { return w8dist(rng); });
452 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
453 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
454 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
455 std::vector<float> requantization_scales(input_channels * depth_multiplier);
456 const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
457 const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
458
459 // Compute reference results, without renormalization.
460 compute_depthwise_convolution_qs8_reference_results(
461 batch_size,
462 output_height,
463 output_width,
464 input_height,
465 input_width,
466 input_padding_top,
467 input_padding_right,
468 input_padding_bottom,
469 input_padding_left,
470 kernel_height,
471 kernel_width,
472 subsampling_height,
473 subsampling_width,
474 dilation_height,
475 dilation_width,
476 input_channels,
477 depth_multiplier,
478 input_zero_point,
479 input,
480 filter,
481 accumulators,
482 /*has_bias=*/true,
483 bias);
484
485 // Compute renormalization parameters.
486 for (size_t c = 0; c < input_channels * depth_multiplier; c++) {
487 int32_t accumulated_min = accumulators[c];
488 int32_t accumulated_max = accumulators[c];
489 for (size_t px = 0; px < batch_size * output_height * output_width; px++) {
490 accumulated_min = std::min(accumulated_min, accumulators[px * input_channels * depth_multiplier + c]);
491 accumulated_max = std::max(accumulated_max, accumulators[px * input_channels * depth_multiplier + c]);
492 }
493
494 float requantization_scale = 0x1.0p-32f;
495 if (accumulated_max != 0) {
496 requantization_scale = std::max(
497 requantization_scale,
498 float(int32_t(std::numeric_limits<int8_t>::max()) - int32_t(output_zero_point)) / float(accumulated_max));
499 }
500 if (accumulated_min != 0) {
501 requantization_scale = std::max(
502 requantization_scale,
503 float(int32_t(std::numeric_limits<int8_t>::min()) - int32_t(output_zero_point)) / float(accumulated_min));
504 }
505 requantization_scale = std::min(requantization_scale, 0x1.FFFFFEp-1f);
506
507 requantization_scales[c] = requantization_scale;
508 }
509
510 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
511 xnn_operator_t op = nullptr;
512
513 // Call operator API.
514 const xnn_status status = xnn_create_convolution2d_nhwc_qc8(
515 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
516 subsampling_height, subsampling_width, dilation_height, dilation_width,
517 /*groups=*/input_channels, /*group_input_channels=*/1,
518 /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, input_zero_point,
519 input_scale, requantization_scales.data(), filter.data(), bias.data(), output_zero_point, output_scale,
520 quantized_output_min, quantized_output_max,
521 /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
522 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
523
524 if (status == xnn_status_unsupported_hardware) {
525 GTEST_SKIP();
526 }
527
528 ASSERT_EQ(xnn_status_success, status);
529 ASSERT_NE(nullptr, op);
530 ASSERT_EQ(
531 xnn_status_success, xnn_setup_convolution2d_nhwc_qc8(
532 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
533 /*threadpool=*/nullptr));
534
535 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
536
537 // Call subgraph API.
538 xnn_subgraph_t subgraph = nullptr;
539 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
540 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
541
542 uint32_t input_id = XNN_INVALID_NODE_ID;
543 ASSERT_EQ(
544 xnn_status_success,
545 xnn_define_quantized_tensor_value(
546 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
547 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
548 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
549
550 uint32_t filter_id = XNN_INVALID_NODE_ID;
551 ASSERT_EQ(
552 xnn_status_success, xnn_define_channelwise_quantized_tensor_value(
553 subgraph, xnn_datatype_qcint8, requantization_scales.data(), filter_dims.size(), 3,
554 filter_dims.data(), filter.data(), /*external_id=*/1,
555 /*flags=*/0, &filter_id));
556
557 uint32_t bias_id = XNN_INVALID_NODE_ID;
558 ASSERT_EQ(
559 xnn_status_success,
560 xnn_define_channelwise_quantized_tensor_value(
561 subgraph, xnn_datatype_qcint32, requantization_scales.data(), bias_dims.size(), 0, bias_dims.data(), bias.data(),
562 /*external_id=*/2, /*flags=*/0, &bias_id));
563
564 uint32_t output_id = XNN_INVALID_NODE_ID;
565 ASSERT_EQ(
566 xnn_status_success,
567 xnn_define_quantized_tensor_value(
568 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
569 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
570 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
571
572 ASSERT_EQ(
573 xnn_status_success,
574 xnn_define_depthwise_convolution_2d(
575 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
576 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
577 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
578 /*flags=*/0));
579
580 xnn_runtime_t runtime = nullptr;
581 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
582 ASSERT_NE(nullptr, runtime);
583 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
584 std::array<xnn_external_value, 2> external = {
585 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
586 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
587 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
588
589 ASSERT_EQ(subgraph_output, operator_output);
590 }
591
TEST_F(DepthwiseConvolutionTestQS8,matches_operator_api)592 TEST_F(DepthwiseConvolutionTestQS8, matches_operator_api)
593 {
594 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
595
596 xnn_operator_t op = nullptr;
597
598 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
599 std::generate(filter.begin(), filter.end(), [&]() { return w8dist(rng); });
600 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
601 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
602 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
603
604 compute_convolution_qs8_reference_results(
605 batch_size,
606 output_height,
607 output_width,
608 input_height,
609 input_width,
610 input_padding_top,
611 input_padding_right,
612 input_padding_bottom,
613 input_padding_left,
614 kernel_height,
615 kernel_width,
616 subsampling_height,
617 subsampling_width,
618 dilation_height,
619 dilation_width,
620 /*groups=*/input_channels,
621 /*group_input_channels=*/1,
622 /*group_output_channels=*/depth_multiplier,
623 input_zero_point,
624 input,
625 filter,
626 accumulators,
627 /*has_bias=*/true,
628 bias);
629
630 // Compute renormalization parameters.
631 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
632 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
633
634 float output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
635 int8_t output_zero_point = int8_t(std::max(
636 std::min(
637 lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
638 long(std::numeric_limits<int8_t>::max())),
639 long(std::numeric_limits<int8_t>::min())));
640 const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
641 const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
642
643 // Call operator API.
644 const xnn_status status = xnn_create_convolution2d_nhwc_qs8(
645 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
646 subsampling_height, subsampling_width, dilation_height, dilation_width,
647 /*groups=*/input_channels, /*group_input_channels=*/1,
648 /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, input_zero_point,
649 input_scale, kernel_scale, filter.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
650 quantized_output_max,
651 /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
652 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
653
654 if (status == xnn_status_unsupported_hardware) {
655 GTEST_SKIP();
656 }
657
658 ASSERT_EQ(xnn_status_success, status);
659 ASSERT_NE(nullptr, op);
660 ASSERT_EQ(
661 xnn_status_success, xnn_setup_convolution2d_nhwc_qs8(
662 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
663 /*threadpool=*/nullptr));
664
665 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
666
667 // Call subgraph API.
668 xnn_subgraph_t subgraph = nullptr;
669 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
670 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
671
672 uint32_t input_id = XNN_INVALID_NODE_ID;
673 ASSERT_EQ(
674 xnn_status_success,
675 xnn_define_quantized_tensor_value(
676 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
677 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
678 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
679
680 uint32_t filter_id = XNN_INVALID_NODE_ID;
681 ASSERT_EQ(
682 xnn_status_success, xnn_define_quantized_tensor_value(
683 subgraph, xnn_datatype_qint8, kernel_zero_point, kernel_scale, filter_dims.size(),
684 filter_dims.data(), filter.data(), /*external_id=*/1,
685 /*flags=*/0, &filter_id));
686
687 uint32_t bias_id = XNN_INVALID_NODE_ID;
688 ASSERT_EQ(
689 xnn_status_success,
690 xnn_define_quantized_tensor_value(
691 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
692 /*external_id=*/2, /*flags=*/0, &bias_id));
693
694 uint32_t output_id = XNN_INVALID_NODE_ID;
695 ASSERT_EQ(
696 xnn_status_success,
697 xnn_define_quantized_tensor_value(
698 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
699 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
700 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
701
702 ASSERT_EQ(
703 xnn_status_success,
704 xnn_define_depthwise_convolution_2d(
705 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
706 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
707 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
708 /*flags=*/0));
709
710 xnn_runtime_t runtime = nullptr;
711 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
712 ASSERT_NE(nullptr, runtime);
713 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
714 std::array<xnn_external_value, 2> external = {
715 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
716 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
717 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
718
719 ASSERT_EQ(subgraph_output, operator_output);
720 }
721
TEST_F(DepthwiseConvolutionTestQU8,matches_operator_api)722 TEST_F(DepthwiseConvolutionTestQU8, matches_operator_api)
723 {
724 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
725
726 xnn_operator_t op = nullptr;
727
728 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
729 std::generate(filter.begin(), filter.end(), [&]() { return u8dist(rng); });
730 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
731 std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
732 std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
733
734 // Compute reference results, without renormalization.
735 compute_convolution_qu8_reference_results(
736 batch_size,
737 output_height,
738 output_width,
739 input_height,
740 input_width,
741 input_padding_top,
742 input_padding_right,
743 input_padding_bottom,
744 input_padding_left,
745 kernel_height,
746 kernel_width,
747 subsampling_height,
748 subsampling_width,
749 dilation_height,
750 dilation_width,
751 /*groups=*/input_channels,
752 /*group_input_channels=*/1,
753 /*group_output_channels=*/depth_multiplier,
754 input_zero_point,
755 kernel_zero_point,
756 input,
757 filter,
758 accumulators,
759 /*has_bias=*/true,
760 bias);
761
762 // Compute renormalization parameters.
763 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
764 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
765
766 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
767 const uint8_t output_zero_point = uint8_t(std::max(
768 std::min(
769 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
770 long(std::numeric_limits<uint8_t>::max())),
771 long(std::numeric_limits<uint8_t>::min())));
772 const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point);
773 const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point);
774
775 // Call operator API.
776 const xnn_status status = xnn_create_convolution2d_nhwc_qu8(
777 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
778 subsampling_height, subsampling_width, dilation_height, dilation_width,
779 /*groups=*/input_channels, /*group_input_channels=*/1,
780 /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, input_zero_point,
781 input_scale, kernel_zero_point, kernel_scale, filter.data(), bias.data(), output_zero_point, output_scale,
782 quantized_output_min, quantized_output_max,
783 /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
784 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
785
786 if (status == xnn_status_unsupported_hardware) {
787 GTEST_SKIP();
788 }
789
790 ASSERT_EQ(xnn_status_success, status);
791 ASSERT_NE(nullptr, op);
792 ASSERT_EQ(
793 xnn_status_success, xnn_setup_convolution2d_nhwc_qu8(
794 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
795 /*threadpool=*/nullptr));
796
797 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
798
799 // Call subgraph API.
800 xnn_subgraph_t subgraph = nullptr;
801 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
802 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
803
804 uint32_t input_id = XNN_INVALID_NODE_ID;
805 ASSERT_EQ(
806 xnn_status_success,
807 xnn_define_quantized_tensor_value(
808 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(), input_dims.data(), nullptr,
809 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
810 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
811
812 uint32_t filter_id = XNN_INVALID_NODE_ID;
813 ASSERT_EQ(
814 xnn_status_success, xnn_define_quantized_tensor_value(
815 subgraph, xnn_datatype_quint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
816 filter.data(), /*external_id=*/1,
817 /*flags=*/0, &filter_id));
818
819 uint32_t bias_id = XNN_INVALID_NODE_ID;
820 ASSERT_EQ(
821 xnn_status_success,
822 xnn_define_quantized_tensor_value(
823 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(), bias.data(),
824 /*external_id=*/2, /*flags=*/0, &bias_id));
825
826 uint32_t output_id = XNN_INVALID_NODE_ID;
827 ASSERT_EQ(
828 xnn_status_success,
829 xnn_define_quantized_tensor_value(
830 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(), output_dims.data(), nullptr,
831 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
832 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
833
834 ASSERT_EQ(
835 xnn_status_success,
836 xnn_define_depthwise_convolution_2d(
837 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
838 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
839 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
840 /*flags=*/0));
841
842 xnn_runtime_t runtime = nullptr;
843 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
844 ASSERT_NE(nullptr, runtime);
845 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
846 std::array<xnn_external_value, 2> external = {
847 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
848 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
849 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
850
851 ASSERT_EQ(subgraph_output, operator_output);
852 }
853
TEST_F(DepthwiseConvolutionTestF32,matches_operator_api)854 TEST_F(DepthwiseConvolutionTestF32, matches_operator_api)
855 {
856 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
857
858 xnn_operator_t op = nullptr;
859
860 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
861 std::generate(filter.begin(), filter.end(), [&]() { return f32dist(rng); });
862 std::generate(bias.begin(), bias.end(), [&]() { return f32dist(rng); });
863 std::fill(operator_output.begin(), operator_output.end(), nanf(""));
864 std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
865
866 // Call operator API.
867 const xnn_status status = xnn_create_convolution2d_nhwc_f32(
868 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
869 subsampling_height, subsampling_width, dilation_height, dilation_width,
870 /*groups=*/input_channels, /*group_input_channels=*/1,
871 /*group_output_channels=*/depth_multiplier, input_channels, input_channels * depth_multiplier, filter.data(),
872 bias.data(), output_min, output_max,
873 /*flags=*/XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, &op);
874 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
875
876 if (status == xnn_status_unsupported_hardware) {
877 GTEST_SKIP();
878 }
879
880 ASSERT_EQ(xnn_status_success, status);
881 ASSERT_NE(nullptr, op);
882 ASSERT_EQ(
883 xnn_status_success, xnn_setup_convolution2d_nhwc_f32(
884 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
885 /*threadpool=*/nullptr));
886
887 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
888
889 // Call subgraph API.
890 xnn_subgraph_t subgraph = nullptr;
891 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
892 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
893
894 uint32_t input_id = XNN_INVALID_NODE_ID;
895 ASSERT_EQ(
896 xnn_status_success, xnn_define_tensor_value(
897 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
898 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
899 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
900
901 uint32_t filter_id = XNN_INVALID_NODE_ID;
902 ASSERT_EQ(
903 xnn_status_success, xnn_define_tensor_value(
904 subgraph, xnn_datatype_fp32, filter_dims.size(), filter_dims.data(), filter.data(),
905 /*external_id=*/1, /*flags=*/0, &filter_id));
906
907 uint32_t bias_id = XNN_INVALID_NODE_ID;
908 ASSERT_EQ(
909 xnn_status_success, xnn_define_tensor_value(
910 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
911 /*external_id=*/2, /*flags=*/0, &bias_id));
912
913 uint32_t output_id = XNN_INVALID_NODE_ID;
914 ASSERT_EQ(
915 xnn_status_success, xnn_define_tensor_value(
916 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
917 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
918 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
919 ASSERT_EQ(
920 xnn_status_success,
921 xnn_define_depthwise_convolution_2d(
922 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
923 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, depth_multiplier,
924 input_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
925 /*flags=*/0));
926
927 xnn_runtime_t runtime = nullptr;
928 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
929 ASSERT_NE(nullptr, runtime);
930 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
931 std::array<xnn_external_value, 2> external = {
932 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
933 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
934 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
935
936 ASSERT_EQ(subgraph_output, operator_output);
937 }
938 } // namespace xnnpack
939