1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm> // For std::generate, std::min.
7 #include <array> // For std::array.
8 #include <cmath> // For std::lrintf.
9 #include <cstddef> // For size_t.
10 #include <cstdint> // For uint32_t.
11 #include <limits> // For std::numeric_limits.
12 #include <memory> // For std::unique_ptr.
13 #include <random> // For std::random_device, std::mt19937, std::uniform_real_distribution.
14 #include <vector> // For std::vector.
15
16 #include <xnnpack.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/requantization.h>
19 #include <xnnpack/subgraph.h>
20
21 #include "convolution-test-helpers.h"
22 #include <gtest/gtest.h>
23
24 namespace xnnpack {
25 template <class T, class BiasType = T> class ConvolutionTestBase : public ::testing::Test {
26 protected:
ConvolutionTestBase()27 ConvolutionTestBase()
28 {
29 random_device = std::unique_ptr<std::random_device>(new std::random_device());
30 rng = std::mt19937((*random_device)());
31 input_size_dist = std::uniform_int_distribution<uint32_t>(10, 15);
32 kernel_size_dist = std::uniform_int_distribution<uint32_t>(1, 5);
33 stride_dist = std::uniform_int_distribution<uint32_t>(1, 2);
34 f32dist = std::uniform_real_distribution<float>(0.1f, 1.0f);
35 scale_dist = std::uniform_real_distribution<float>(1.0f, 5.0f);
36 i32dist = std::uniform_int_distribution<int32_t>(-10000, 10000);
37
38 batch_size = input_size_dist(rng);
39 input_height = input_size_dist(rng);
40 input_width = input_size_dist(rng);
41 kernel_height = kernel_size_dist(rng);
42 kernel_width = kernel_size_dist(rng);
43 subsampling_height = stride_dist(rng);
44 subsampling_width = subsampling_height;
45 dilation_height = 1; // TODO(zhin): test other dilation values.
46 dilation_width = dilation_height;
47 groups = input_size_dist(rng);
48 group_input_channels = input_size_dist(rng);
49 group_output_channels = input_size_dist(rng);
50 output_min = -std::numeric_limits<float>::infinity();
51 output_max = std::numeric_limits<float>::infinity();
52 output_height = xnn_compute_convolution_output_dimension(input_height, kernel_height, dilation_height, subsampling_height);
53 output_width = xnn_compute_convolution_output_dimension(input_width, kernel_width, dilation_width, subsampling_width);
54
55 input_dims = {{batch_size, input_height, input_width, group_input_channels}};
56 filter_dims = {{groups * group_output_channels, kernel_height, kernel_width, group_input_channels}};
57 bias_dims = {{groups * group_output_channels}};
58 output_dims = {{batch_size, output_height, output_width, groups * group_output_channels}};
59
60 input = std::vector<T>(
61 XNN_EXTRA_BYTES / sizeof(T) + batch_size * input_height * input_width * groups * group_input_channels);
62 filter = std::vector<T>(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
63 bias = std::vector<BiasType>(groups * group_output_channels);
64 operator_output = std::vector<T>(batch_size * output_height * output_width * groups * group_output_channels);
65 subgraph_output = std::vector<T>(batch_size * output_height * output_width * groups * group_output_channels);
66 }
67
68 std::unique_ptr<std::random_device> random_device;
69 std::mt19937 rng;
70 std::uniform_int_distribution<uint32_t> input_size_dist;
71 std::uniform_int_distribution<uint32_t> kernel_size_dist;
72 std::uniform_int_distribution<uint32_t> stride_dist;
73 std::uniform_int_distribution<int32_t> i32dist;
74 std::uniform_real_distribution<float> f32dist;
75 std::uniform_real_distribution<float> scale_dist;
76
77 const uint32_t input_padding_top = 0;
78 const uint32_t input_padding_right = 0;
79 const uint32_t input_padding_bottom = 0;
80 const uint32_t input_padding_left = 0;
81 uint32_t batch_size;
82 uint32_t input_height;
83 uint32_t input_width;
84 uint32_t kernel_height;
85 uint32_t kernel_width;
86 uint32_t subsampling_height;
87 uint32_t subsampling_width;
88 uint32_t dilation_height;
89 uint32_t dilation_width;
90 uint32_t groups;
91 uint32_t group_input_channels;
92 uint32_t group_output_channels;
93 float output_min;
94 float output_max;
95 uint32_t output_height;
96 uint32_t output_width;
97
98 std::array<size_t, 4> input_dims;
99 std::array<size_t, 4> filter_dims;
100 std::array<size_t, 1> bias_dims;
101 std::array<size_t, 4> output_dims;
102
103 std::vector<T> input;
104 std::vector<T> filter;
105 std::vector<BiasType> bias;
106 std::vector<T> operator_output;
107 std::vector<T> subgraph_output;
108 };
109
110 template <class T> class QuantizedConvolutionTestBase : public ConvolutionTestBase<T, int32_t> {
111 protected:
QuantizedConvolutionTestBase()112 QuantizedConvolutionTestBase()
113 {
114 i8dist = std::uniform_int_distribution<int32_t>(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
115 w8dist = std::uniform_int_distribution<int32_t>(-std::numeric_limits<T>::max(), std::numeric_limits<T>::max());
116 std::uniform_int_distribution<int32_t> u8dist(
117 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
118 accumulators = std::vector<int32_t>(
119 this->batch_size * this->output_height * this->output_width * this->groups * this->group_output_channels);
120 }
121
122 std::uniform_int_distribution<int32_t> i8dist;
123 std::uniform_int_distribution<int32_t> u8dist;
124 std::uniform_int_distribution<int32_t> w8dist;
125 std::vector<int32_t> accumulators;
126 };
127
128 using ConvolutionTestQC8 = QuantizedConvolutionTestBase<int8_t>;
129 using ConvolutionTestQS8 = QuantizedConvolutionTestBase<int8_t>;
130 using ConvolutionTestQU8 = QuantizedConvolutionTestBase<uint8_t>;
131 using ConvolutionTestF32 = ConvolutionTestBase<float>;
132
TEST_F(ConvolutionTestQC8,define)133 TEST_F(ConvolutionTestQC8, define)
134 {
135 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
136
137 xnn_subgraph_t subgraph = nullptr;
138 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
139 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
140
141 uint32_t input_id = XNN_INVALID_NODE_ID;
142 ASSERT_EQ(
143 xnn_status_success, xnn_define_quantized_tensor_value(
144 subgraph, xnn_datatype_qint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
145 /*external_id=*/0, /*flags=*/0, &input_id));
146 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
147
148 std::vector<float> scale(groups * group_output_channels, 1.0f);
149 uint32_t filter_id = XNN_INVALID_NODE_ID;
150 ASSERT_EQ(
151 xnn_status_success,
152 xnn_define_channelwise_quantized_tensor_value(
153 subgraph, xnn_datatype_qcint8, scale.data(), filter_dims.size(), 0, filter_dims.data(), filter.data(),
154 /*external_id=*/1, /*flags=*/0, &filter_id));
155
156 uint32_t bias_id = XNN_INVALID_NODE_ID;
157 ASSERT_EQ(
158 xnn_status_success,
159 xnn_define_channelwise_quantized_tensor_value(
160 subgraph, xnn_datatype_qcint32, scale.data(), bias_dims.size(), 0, bias_dims.data(), bias.data(),
161 /*external_id=*/2, /*flags=*/0, &bias_id));
162
163 uint32_t output_id = XNN_INVALID_NODE_ID;
164 ASSERT_EQ(
165 xnn_status_success, xnn_define_quantized_tensor_value(
166 subgraph, xnn_datatype_qint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
167 /*external_id=*/3, /*flags=*/0, &output_id));
168 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
169
170 ASSERT_EQ(
171 xnn_status_success,
172 xnn_define_convolution_2d(
173 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
174 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
175 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
176 /*flags=*/0));
177
178 ASSERT_EQ(subgraph->num_nodes, 1);
179 const struct xnn_node* node = &subgraph->nodes[0];
180 ASSERT_EQ(node->type, xnn_node_type_convolution_2d);
181 ASSERT_EQ(node->compute_type, xnn_compute_type_qc8);
182 ASSERT_EQ(node->params.convolution_2d.input_padding_top, input_padding_top);
183 ASSERT_EQ(node->params.convolution_2d.input_padding_right, input_padding_right);
184 ASSERT_EQ(node->params.convolution_2d.input_padding_bottom, input_padding_bottom);
185 ASSERT_EQ(node->params.convolution_2d.input_padding_left, input_padding_left);
186 ASSERT_EQ(node->params.convolution_2d.kernel_height, kernel_height);
187 ASSERT_EQ(node->params.convolution_2d.kernel_width, kernel_width);
188 ASSERT_EQ(node->params.convolution_2d.subsampling_height, subsampling_height);
189 ASSERT_EQ(node->params.convolution_2d.subsampling_width, subsampling_width);
190 ASSERT_EQ(node->params.convolution_2d.dilation_height, dilation_height);
191 ASSERT_EQ(node->params.convolution_2d.dilation_width, dilation_width);
192 ASSERT_EQ(node->params.convolution_2d.groups, groups);
193 ASSERT_EQ(node->params.convolution_2d.group_input_channels, group_input_channels);
194 ASSERT_EQ(node->params.convolution_2d.group_output_channels, group_output_channels);
195 ASSERT_EQ(node->activation.output_min, output_min);
196 ASSERT_EQ(node->activation.output_max, output_max);
197 ASSERT_EQ(node->num_inputs, 3);
198 ASSERT_EQ(node->inputs[0], input_id);
199 ASSERT_EQ(node->inputs[1], filter_id);
200 ASSERT_EQ(node->inputs[2], bias_id);
201 ASSERT_EQ(node->num_outputs, 1);
202 ASSERT_EQ(node->outputs[0], output_id);
203 ASSERT_EQ(node->flags, 0);
204 }
205
TEST_F(ConvolutionTestQS8,define)206 TEST_F(ConvolutionTestQS8, define)
207 {
208 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
209
210 xnn_subgraph_t subgraph = nullptr;
211 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
212 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
213
214 uint32_t input_id = XNN_INVALID_NODE_ID;
215 ASSERT_EQ(
216 xnn_status_success, xnn_define_quantized_tensor_value(
217 subgraph, xnn_datatype_qint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
218 /*external_id=*/0, /*flags=*/0, &input_id));
219 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
220
221 uint32_t filter_id = XNN_INVALID_NODE_ID;
222 ASSERT_EQ(
223 xnn_status_success, xnn_define_quantized_tensor_value(
224 subgraph, xnn_datatype_qint8, 0, 1.0f, filter_dims.size(), filter_dims.data(), filter.data(),
225 /*external_id=*/1, /*flags=*/0, &filter_id));
226
227 uint32_t bias_id = XNN_INVALID_NODE_ID;
228 ASSERT_EQ(
229 xnn_status_success, xnn_define_quantized_tensor_value(
230 subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
231 /*external_id=*/2, /*flags=*/0, &bias_id));
232
233 uint32_t output_id = XNN_INVALID_NODE_ID;
234 ASSERT_EQ(
235 xnn_status_success, xnn_define_quantized_tensor_value(
236 subgraph, xnn_datatype_qint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
237 /*external_id=*/3, /*flags=*/0, &output_id));
238 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
239
240 ASSERT_EQ(
241 xnn_status_success,
242 xnn_define_convolution_2d(
243 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
244 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
245 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
246 /*flags=*/0));
247
248 ASSERT_EQ(subgraph->num_nodes, 1);
249 const struct xnn_node* node = &subgraph->nodes[0];
250 ASSERT_EQ(node->type, xnn_node_type_convolution_2d);
251 ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
252 ASSERT_EQ(node->params.convolution_2d.input_padding_top, input_padding_top);
253 ASSERT_EQ(node->params.convolution_2d.input_padding_right, input_padding_right);
254 ASSERT_EQ(node->params.convolution_2d.input_padding_bottom, input_padding_bottom);
255 ASSERT_EQ(node->params.convolution_2d.input_padding_left, input_padding_left);
256 ASSERT_EQ(node->params.convolution_2d.kernel_height, kernel_height);
257 ASSERT_EQ(node->params.convolution_2d.kernel_width, kernel_width);
258 ASSERT_EQ(node->params.convolution_2d.subsampling_height, subsampling_height);
259 ASSERT_EQ(node->params.convolution_2d.subsampling_width, subsampling_width);
260 ASSERT_EQ(node->params.convolution_2d.dilation_height, dilation_height);
261 ASSERT_EQ(node->params.convolution_2d.dilation_width, dilation_width);
262 ASSERT_EQ(node->params.convolution_2d.groups, groups);
263 ASSERT_EQ(node->params.convolution_2d.group_input_channels, group_input_channels);
264 ASSERT_EQ(node->params.convolution_2d.group_output_channels, group_output_channels);
265 ASSERT_EQ(node->activation.output_min, output_min);
266 ASSERT_EQ(node->activation.output_max, output_max);
267 ASSERT_EQ(node->num_inputs, 3);
268 ASSERT_EQ(node->inputs[0], input_id);
269 ASSERT_EQ(node->inputs[1], filter_id);
270 ASSERT_EQ(node->inputs[2], bias_id);
271 ASSERT_EQ(node->num_outputs, 1);
272 ASSERT_EQ(node->outputs[0], output_id);
273 ASSERT_EQ(node->flags, 0);
274 }
275
TEST_F(ConvolutionTestQU8,define)276 TEST_F(ConvolutionTestQU8, define)
277 {
278 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
279
280 xnn_subgraph_t subgraph = nullptr;
281 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
282 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
283
284 uint32_t input_id = XNN_INVALID_NODE_ID;
285 ASSERT_EQ(
286 xnn_status_success, xnn_define_quantized_tensor_value(
287 subgraph, xnn_datatype_quint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
288 /*external_id=*/0, /*flags=*/0, &input_id));
289 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
290
291 uint32_t filter_id = XNN_INVALID_NODE_ID;
292 ASSERT_EQ(
293 xnn_status_success, xnn_define_quantized_tensor_value(
294 subgraph, xnn_datatype_quint8, 0, 1.0f, filter_dims.size(), filter_dims.data(), filter.data(),
295 /*external_id=*/1, /*flags=*/0, &filter_id));
296
297 uint32_t bias_id = XNN_INVALID_NODE_ID;
298 ASSERT_EQ(
299 xnn_status_success, xnn_define_quantized_tensor_value(
300 subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
301 /*external_id=*/2, /*flags=*/0, &bias_id));
302
303 uint32_t output_id = XNN_INVALID_NODE_ID;
304 ASSERT_EQ(
305 xnn_status_success, xnn_define_quantized_tensor_value(
306 subgraph, xnn_datatype_quint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
307 /*external_id=*/3, /*flags=*/0, &output_id));
308 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
309
310 ASSERT_EQ(
311 xnn_status_success,
312 xnn_define_convolution_2d(
313 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
314 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
315 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
316 /*flags=*/0));
317
318 ASSERT_EQ(subgraph->num_nodes, 1);
319 const struct xnn_node* node = &subgraph->nodes[0];
320 ASSERT_EQ(node->type, xnn_node_type_convolution_2d);
321 ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
322 ASSERT_EQ(node->params.convolution_2d.input_padding_top, input_padding_top);
323 ASSERT_EQ(node->params.convolution_2d.input_padding_right, input_padding_right);
324 ASSERT_EQ(node->params.convolution_2d.input_padding_bottom, input_padding_bottom);
325 ASSERT_EQ(node->params.convolution_2d.input_padding_left, input_padding_left);
326 ASSERT_EQ(node->params.convolution_2d.kernel_height, kernel_height);
327 ASSERT_EQ(node->params.convolution_2d.kernel_width, kernel_width);
328 ASSERT_EQ(node->params.convolution_2d.subsampling_height, subsampling_height);
329 ASSERT_EQ(node->params.convolution_2d.subsampling_width, subsampling_width);
330 ASSERT_EQ(node->params.convolution_2d.dilation_height, dilation_height);
331 ASSERT_EQ(node->params.convolution_2d.dilation_width, dilation_width);
332 ASSERT_EQ(node->params.convolution_2d.groups, groups);
333 ASSERT_EQ(node->params.convolution_2d.group_input_channels, group_input_channels);
334 ASSERT_EQ(node->params.convolution_2d.group_output_channels, group_output_channels);
335 ASSERT_EQ(node->activation.output_min, output_min);
336 ASSERT_EQ(node->activation.output_max, output_max);
337 ASSERT_EQ(node->num_inputs, 3);
338 ASSERT_EQ(node->inputs[0], input_id);
339 ASSERT_EQ(node->inputs[1], filter_id);
340 ASSERT_EQ(node->inputs[2], bias_id);
341 ASSERT_EQ(node->num_outputs, 1);
342 ASSERT_EQ(node->outputs[0], output_id);
343 ASSERT_EQ(node->flags, 0);
344 }
345
TEST_F(ConvolutionTestF32,define)346 TEST_F(ConvolutionTestF32, define)
347 {
348 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
349
350 xnn_subgraph_t subgraph = nullptr;
351 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
352 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
353
354 uint32_t input_id = XNN_INVALID_NODE_ID;
355 ASSERT_EQ(
356 xnn_status_success, xnn_define_tensor_value(
357 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
358 /*external_id=*/0, /*flags=*/0, &input_id));
359 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
360
361 uint32_t filter_id = XNN_INVALID_NODE_ID;
362 ASSERT_EQ(
363 xnn_status_success,
364 xnn_define_tensor_value(
365 subgraph, xnn_datatype_fp32, filter_dims.size(), filter_dims.data(), filter.data(), /*external_id=*/1,
366 /*flags=*/0, &filter_id));
367
368 uint32_t bias_id = XNN_INVALID_NODE_ID;
369 ASSERT_EQ(
370 xnn_status_success, xnn_define_tensor_value(
371 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
372 /*external_id=*/2, /*flags=*/0, &bias_id));
373
374 uint32_t output_id = XNN_INVALID_NODE_ID;
375 ASSERT_EQ(
376 xnn_status_success, xnn_define_tensor_value(
377 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
378 /*external_id=*/3, /*flags=*/0, &output_id));
379 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
380
381 ASSERT_EQ(
382 xnn_status_success,
383 xnn_define_convolution_2d(
384 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
385 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
386 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
387 /*flags=*/0));
388
389 ASSERT_EQ(subgraph->num_nodes, 1);
390 const struct xnn_node* node = &subgraph->nodes[0];
391 ASSERT_EQ(node->type, xnn_node_type_convolution_2d);
392 ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
393 ASSERT_EQ(node->params.convolution_2d.input_padding_top, input_padding_top);
394 ASSERT_EQ(node->params.convolution_2d.input_padding_right, input_padding_right);
395 ASSERT_EQ(node->params.convolution_2d.input_padding_bottom, input_padding_bottom);
396 ASSERT_EQ(node->params.convolution_2d.input_padding_left, input_padding_left);
397 ASSERT_EQ(node->params.convolution_2d.kernel_height, kernel_height);
398 ASSERT_EQ(node->params.convolution_2d.kernel_width, kernel_width);
399 ASSERT_EQ(node->params.convolution_2d.subsampling_height, subsampling_height);
400 ASSERT_EQ(node->params.convolution_2d.subsampling_width, subsampling_width);
401 ASSERT_EQ(node->params.convolution_2d.dilation_height, dilation_height);
402 ASSERT_EQ(node->params.convolution_2d.dilation_width, dilation_width);
403 ASSERT_EQ(node->params.convolution_2d.groups, groups);
404 ASSERT_EQ(node->params.convolution_2d.group_input_channels, group_input_channels);
405 ASSERT_EQ(node->params.convolution_2d.group_output_channels, group_output_channels);
406 ASSERT_EQ(node->activation.output_min, output_min);
407 ASSERT_EQ(node->activation.output_max, output_max);
408 ASSERT_EQ(node->num_inputs, 3);
409 ASSERT_EQ(node->inputs[0], input_id);
410 ASSERT_EQ(node->inputs[1], filter_id);
411 ASSERT_EQ(node->inputs[2], bias_id);
412 ASSERT_EQ(node->num_outputs, 1);
413 ASSERT_EQ(node->outputs[0], output_id);
414 ASSERT_EQ(node->flags, 0);
415 }
416
TEST_F(ConvolutionTestQC8,matches_operator_api)417 TEST_F(ConvolutionTestQC8, matches_operator_api)
418 {
419 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
420
421 xnn_operator_t op = nullptr;
422
423 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
424 std::generate(filter.begin(), filter.end(), [&]() { return w8dist(rng); });
425 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
426 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
427 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
428 std::vector<float> requantization_scales(groups * group_output_channels);
429 const int8_t input_zero_point = i8dist(rng);
430 const int8_t output_zero_point = i8dist(rng);
431 const float input_scale = scale_dist(rng);
432 const float output_scale = scale_dist(rng);
433 const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
434 const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
435
436 compute_convolution_qs8_reference_results(
437 batch_size,
438 output_height,
439 output_width,
440 input_height,
441 input_width,
442 input_padding_top,
443 input_padding_right,
444 input_padding_bottom,
445 input_padding_left,
446 kernel_height,
447 kernel_width,
448 subsampling_height,
449 subsampling_width,
450 dilation_height,
451 dilation_width,
452 groups,
453 group_input_channels,
454 group_output_channels,
455 input_zero_point,
456 input,
457 filter,
458 accumulators,
459 /*has_bias=*/true,
460 bias);
461
462 // Compute renormalization parameters.
463 for (size_t c = 0; c < groups * group_output_channels; c++) {
464 int32_t accumulated_min = accumulators[c];
465 int32_t accumulated_max = accumulators[c];
466 for (size_t px = 0; px < batch_size * output_height * output_width; px++) {
467 accumulated_min = std::min(accumulated_min, accumulators[px * groups * group_output_channels + c]);
468 accumulated_max = std::max(accumulated_max, accumulators[px * groups * group_output_channels + c]);
469 }
470
471 float requantization_scale = 0x1.0p-32f;
472 if (accumulated_max != 0) {
473 requantization_scale = std::max(
474 requantization_scale,
475 float(int32_t(std::numeric_limits<int8_t>::max()) - int32_t(output_zero_point)) / float(accumulated_max));
476 }
477 if (accumulated_min != 0) {
478 requantization_scale = std::max(
479 requantization_scale,
480 float(int32_t(std::numeric_limits<int8_t>::min()) - int32_t(output_zero_point)) / float(accumulated_min));
481 }
482 requantization_scale = std::min(requantization_scale, 0x1.FFFFFEp-1f);
483
484 requantization_scales[c] = requantization_scale;
485 }
486
487 // Call operator API.
488 const xnn_status status = xnn_create_convolution2d_nhwc_qc8(
489 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
490 subsampling_height, subsampling_width, dilation_height, dilation_width, groups, group_input_channels,
491 group_output_channels, groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale,
492 requantization_scales.data(), filter.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
493 quantized_output_max, /*flags=*/0, nullptr, &op);
494 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
495
496 if (status == xnn_status_unsupported_hardware) {
497 GTEST_SKIP();
498 }
499
500 ASSERT_EQ(xnn_status_success, status);
501 ASSERT_NE(nullptr, op);
502 ASSERT_EQ(
503 xnn_status_success, xnn_setup_convolution2d_nhwc_qc8(
504 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
505 /*threadpool=*/nullptr));
506
507 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
508
509 // Call subgraph API.
510 xnn_subgraph_t subgraph = nullptr;
511 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
512 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
513
514 uint32_t input_id = XNN_INVALID_NODE_ID;
515 ASSERT_EQ(
516 xnn_status_success, xnn_define_quantized_tensor_value(
517 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(),
518 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
519 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
520
521 uint32_t filter_id = XNN_INVALID_NODE_ID;
522 ASSERT_EQ(
523 xnn_status_success, xnn_define_channelwise_quantized_tensor_value(
524 subgraph, xnn_datatype_qcint8, requantization_scales.data(), filter_dims.size(), 0,
525 filter_dims.data(), filter.data(), /*external_id=*/1, /*flags=*/0, &filter_id));
526
527 uint32_t bias_id = XNN_INVALID_NODE_ID;
528 ASSERT_EQ(
529 xnn_status_success, xnn_define_channelwise_quantized_tensor_value(
530 subgraph, xnn_datatype_qcint32, requantization_scales.data(), bias_dims.size(), 0,
531 bias_dims.data(), bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
532
533 uint32_t output_id = XNN_INVALID_NODE_ID;
534 ASSERT_EQ(
535 xnn_status_success, xnn_define_quantized_tensor_value(
536 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(),
537 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
538 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
539 ASSERT_EQ(
540 xnn_status_success,
541 xnn_define_convolution_2d(
542 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
543 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
544 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
545 /*flags=*/0));
546
547 xnn_runtime_t runtime = nullptr;
548 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
549 ASSERT_NE(nullptr, runtime);
550 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
551 std::array<xnn_external_value, 2> external = {
552 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
553 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
554 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
555
556 // Check outputs match.
557 for (size_t i = 0; i < operator_output.size(); i++) {
558 ASSERT_EQ(subgraph_output[i], operator_output[i]);
559 }
560 }
561
TEST_F(ConvolutionTestQS8,matches_operator_api)562 TEST_F(ConvolutionTestQS8, matches_operator_api)
563 {
564 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
565
566 xnn_operator_t op = nullptr;
567
568 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
569 std::generate(filter.begin(), filter.end(), [&]() { return w8dist(rng); });
570 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
571 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
572 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
573 const int8_t input_zero_point = -1;
574 const float input_scale = scale_dist(rng);
575 const float kernel_scale = scale_dist(rng);
576
577 compute_convolution_qs8_reference_results(
578 batch_size,
579 output_height,
580 output_width,
581 input_height,
582 input_width,
583 input_padding_top,
584 input_padding_right,
585 input_padding_bottom,
586 input_padding_left,
587 kernel_height,
588 kernel_width,
589 subsampling_height,
590 subsampling_width,
591 dilation_height,
592 dilation_width,
593 groups,
594 group_input_channels,
595 group_output_channels,
596 input_zero_point,
597 input,
598 filter,
599 accumulators,
600 /*has_bias=*/true,
601 bias);
602
603 // Compute renormalization parameters.
604 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
605 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
606
607 float output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
608 int8_t output_zero_point = int8_t(std::max(
609 std::min(
610 lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
611 long(std::numeric_limits<int8_t>::max())),
612 long(std::numeric_limits<int8_t>::min())));
613 const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
614 const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
615
616 // Call operator API.
617 const xnn_status status = xnn_create_convolution2d_nhwc_qs8(
618 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
619 subsampling_height, subsampling_width, dilation_height, dilation_width, groups, group_input_channels,
620 group_output_channels, groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale,
621 kernel_scale, filter.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
622 quantized_output_max, /*flags=*/0, nullptr, &op);
623 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
624
625 if (status == xnn_status_unsupported_hardware) {
626 GTEST_SKIP();
627 }
628
629 ASSERT_EQ(xnn_status_success, status);
630 ASSERT_NE(nullptr, op);
631 ASSERT_EQ(
632 xnn_status_success, xnn_setup_convolution2d_nhwc_qs8(
633 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
634 /*threadpool=*/nullptr));
635
636 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
637
638 // Call subgraph API.
639 xnn_subgraph_t subgraph = nullptr;
640 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
641 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
642
643 uint32_t input_id = XNN_INVALID_NODE_ID;
644 ASSERT_EQ(
645 xnn_status_success, xnn_define_quantized_tensor_value(
646 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(),
647 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
648 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
649
650 uint32_t filter_id = XNN_INVALID_NODE_ID;
651 ASSERT_EQ(
652 xnn_status_success, xnn_define_quantized_tensor_value(
653 subgraph, xnn_datatype_qint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
654 filter.data(), /*external_id=*/1, /*flags=*/0, &filter_id));
655
656 uint32_t bias_id = XNN_INVALID_NODE_ID;
657 ASSERT_EQ(
658 xnn_status_success, xnn_define_quantized_tensor_value(
659 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
660 bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
661
662 uint32_t output_id = XNN_INVALID_NODE_ID;
663 ASSERT_EQ(
664 xnn_status_success, xnn_define_quantized_tensor_value(
665 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(),
666 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
667 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
668 ASSERT_EQ(
669 xnn_status_success,
670 xnn_define_convolution_2d(
671 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
672 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
673 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
674 /*flags=*/0));
675
676 xnn_runtime_t runtime = nullptr;
677 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
678 ASSERT_NE(nullptr, runtime);
679 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
680 std::array<xnn_external_value, 2> external = {
681 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
682 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
683 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
684
685 // Check outputs match.
686 for (size_t i = 0; i < operator_output.size(); i++) {
687 ASSERT_EQ(subgraph_output[i], operator_output[i]);
688 }
689 }
690
TEST_F(ConvolutionTestQU8,matches_operator_api)691 TEST_F(ConvolutionTestQU8, matches_operator_api)
692 {
693 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
694
695 xnn_operator_t op = nullptr;
696
697 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
698 std::generate(filter.begin(), filter.end(), [&]() { return u8dist(rng); });
699 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
700 std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
701 std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
702 const uint8_t input_zero_point = u8dist(rng);
703 const uint8_t kernel_zero_point = 0;
704 const float input_scale = scale_dist(rng);
705 const float kernel_scale = scale_dist(rng);
706
707 // Compute reference results, without renormalization.
708 compute_convolution_qu8_reference_results(
709 batch_size,
710 output_height,
711 output_width,
712 input_height,
713 input_width,
714 input_padding_top,
715 input_padding_right,
716 input_padding_bottom,
717 input_padding_left,
718 kernel_height,
719 kernel_width,
720 subsampling_height,
721 subsampling_width,
722 dilation_height,
723 dilation_width,
724 groups,
725 group_input_channels,
726 group_output_channels,
727 input_zero_point,
728 kernel_zero_point,
729 input,
730 filter,
731 accumulators,
732 /*has_bias=*/true,
733 bias);
734
735 // Compute renormalization parameters.
736 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
737 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
738
739 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
740 const uint8_t output_zero_point = uint8_t(std::max(
741 std::min(
742 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
743 long(std::numeric_limits<uint8_t>::max())),
744 long(std::numeric_limits<uint8_t>::min())));
745 const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point);
746 const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point);
747
748 // Call operator API.
749 const xnn_status status = xnn_create_convolution2d_nhwc_qu8(
750 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
751 subsampling_height, subsampling_width, dilation_height, dilation_width, groups, group_input_channels,
752 group_output_channels, groups * group_input_channels, groups * group_output_channels, input_zero_point, input_scale,
753 kernel_zero_point, kernel_scale, filter.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
754 quantized_output_max, /*flags=*/0, nullptr, &op);
755 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
756
757 if (status == xnn_status_unsupported_hardware) {
758 GTEST_SKIP();
759 }
760
761 ASSERT_EQ(xnn_status_success, status);
762 ASSERT_NE(nullptr, op);
763 ASSERT_EQ(
764 xnn_status_success, xnn_setup_convolution2d_nhwc_qu8(
765 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
766 /*threadpool=*/nullptr));
767
768 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
769
770 // Call subgraph API.
771 xnn_subgraph_t subgraph = nullptr;
772 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
773 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
774
775 uint32_t input_id = XNN_INVALID_NODE_ID;
776 ASSERT_EQ(
777 xnn_status_success, xnn_define_quantized_tensor_value(
778 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(),
779 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
780 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
781
782 uint32_t filter_id = XNN_INVALID_NODE_ID;
783 ASSERT_EQ(
784 xnn_status_success, xnn_define_quantized_tensor_value(
785 subgraph, xnn_datatype_quint8, 0, kernel_scale, filter_dims.size(), filter_dims.data(),
786 filter.data(), /*external_id=*/1, /*flags=*/0, &filter_id));
787
788 uint32_t bias_id = XNN_INVALID_NODE_ID;
789 ASSERT_EQ(
790 xnn_status_success, xnn_define_quantized_tensor_value(
791 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
792 bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
793
794 uint32_t output_id = XNN_INVALID_NODE_ID;
795 ASSERT_EQ(
796 xnn_status_success, xnn_define_quantized_tensor_value(
797 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(),
798 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
799 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
800 ASSERT_EQ(
801 xnn_status_success,
802 xnn_define_convolution_2d(
803 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
804 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
805 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
806 /*flags=*/0));
807
808 xnn_runtime_t runtime = nullptr;
809 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
810 ASSERT_NE(nullptr, runtime);
811 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
812 std::array<xnn_external_value, 2> external = {
813 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
814 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
815 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
816
817 // Check outputs match.
818 for (size_t i = 0; i < operator_output.size(); i++) {
819 ASSERT_EQ(subgraph_output[i], operator_output[i]);
820 }
821 }
822
TEST_F(ConvolutionTestF32,matches_operator_api)823 TEST_F(ConvolutionTestF32, matches_operator_api)
824 {
825 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
826
827 xnn_operator_t op = nullptr;
828
829 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
830 std::generate(filter.begin(), filter.end(), [&]() { return f32dist(rng); });
831 std::generate(bias.begin(), bias.end(), [&]() { return f32dist(rng); });
832 std::fill(operator_output.begin(), operator_output.end(), nanf(""));
833 std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
834
835 // Call operator API.
836 const xnn_status status = xnn_create_convolution2d_nhwc_f32(
837 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height, kernel_width,
838 subsampling_height, subsampling_width, dilation_height, dilation_width, groups, group_input_channels,
839 group_output_channels, groups * group_input_channels, groups * group_output_channels, filter.data(), bias.data(),
840 output_min, output_max,
841 /*flags=*/0, nullptr, &op);
842 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
843
844 if (status == xnn_status_unsupported_hardware) {
845 GTEST_SKIP();
846 }
847
848 ASSERT_EQ(xnn_status_success, status);
849 ASSERT_NE(nullptr, op);
850 ASSERT_EQ(
851 xnn_status_success, xnn_setup_convolution2d_nhwc_f32(
852 op, batch_size, input_height, input_width, input.data(), operator_output.data(),
853 /*threadpool=*/nullptr));
854
855 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
856
857 // Call subgraph API.
858 xnn_subgraph_t subgraph = nullptr;
859 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
860 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
861
862 uint32_t input_id = XNN_INVALID_NODE_ID;
863 ASSERT_EQ(
864 xnn_status_success, xnn_define_tensor_value(
865 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
866 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
867 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
868
869 uint32_t filter_id = XNN_INVALID_NODE_ID;
870 ASSERT_EQ(
871 xnn_status_success, xnn_define_tensor_value(
872 subgraph, xnn_datatype_fp32, filter_dims.size(), filter_dims.data(), filter.data(),
873 /*external_id=*/1, /*flags=*/0, &filter_id));
874
875 uint32_t bias_id = XNN_INVALID_NODE_ID;
876 ASSERT_EQ(
877 xnn_status_success, xnn_define_tensor_value(
878 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
879 /*external_id=*/2, /*flags=*/0, &bias_id));
880
881 uint32_t output_id = XNN_INVALID_NODE_ID;
882 ASSERT_EQ(
883 xnn_status_success, xnn_define_tensor_value(
884 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
885 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
886 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
887 ASSERT_EQ(
888 xnn_status_success,
889 xnn_define_convolution_2d(
890 subgraph, input_padding_top, input_padding_right, input_padding_bottom, input_padding_left, kernel_height,
891 kernel_width, subsampling_height, subsampling_width, dilation_height, dilation_width, groups,
892 group_input_channels, group_output_channels, output_min, output_max, input_id, filter_id, bias_id, output_id,
893 /*flags=*/0));
894
895 xnn_runtime_t runtime = nullptr;
896 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
897 ASSERT_NE(nullptr, runtime);
898 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
899 std::array<xnn_external_value, 2> external = {
900 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
901 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
902 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
903
904 // Check outputs match.
905 for (size_t i = 0; i < operator_output.size(); i++) {
906 ASSERT_EQ(subgraph_output[i], operator_output[i]);
907 }
908 }
909 } // namespace xnnpack
910