1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm> // For std::generate, std::min.
7 #include <array> // For std::array.
8 #include <cmath> // For std::lrintf.
9 #include <cstddef> // For size_t.
10 #include <cstdint> // For uint32_t.
11 #include <limits> // For std::numeric_limits.
12 #include <memory> // For std::unique_ptr.
13 #include <numeric> // For std::accumulate.
14 #include <random> // For std::random_device, std::mt19937, std::uniform_real_distribution.
15 #include <vector> // For std::vector.
16
17 #include <xnnpack.h>
18 #include <xnnpack/operator.h>
19 #include <xnnpack/requantization.h>
20 #include <xnnpack/subgraph.h>
21
22 #include <gtest/gtest.h>
23
24 template <class T, class BiasType = T> class FullyConnectedTestBase : public ::testing::Test {
25 protected:
FullyConnectedTestBase()26 FullyConnectedTestBase()
27 {
28 random_device = std::unique_ptr<std::random_device>(new std::random_device());
29 rng = std::mt19937((*random_device)());
30 input_size_dist = std::uniform_int_distribution<uint32_t>(10, 15);
31 kernel_size_dist = std::uniform_int_distribution<uint32_t>(1, 5);
32 stride_dist = std::uniform_int_distribution<uint32_t>(1, 2);
33 f32dist = std::uniform_real_distribution<float>(0.1f, 1.0f);
34 scale_dist = std::uniform_real_distribution<float>(1.0f, 5.0f);
35 i32dist = std::uniform_int_distribution<int32_t>(-10000, 10000);
36 auto shape_dist = std::uniform_int_distribution<size_t>(2, XNN_MAX_TENSOR_DIMS);
37 dim_dist = std::uniform_int_distribution<size_t>(5, 15);
38 i8dist =
39 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
40 w8dist =
41 std::uniform_int_distribution<int32_t>(-std::numeric_limits<uint8_t>::max(), std::numeric_limits<uint8_t>::max());
42
43 output_min = -std::numeric_limits<float>::infinity();
44 output_max = std::numeric_limits<float>::infinity();
45
46 size_t num_input_dims = shape_dist(rng);
47 input_dims = RandomShape(num_input_dims);
48 assert(input_dims.size() >= 2);
49 output_channels = dim_dist(rng);
50 input_channels = input_dims.back();
51 kernel_dims = {output_channels, input_channels};
52 output_dims = input_dims;
53 output_dims[output_dims.size() - 1] = output_channels;
54
55 batch_size = NumElements(input_dims) / input_channels;
56
57 input = std::vector<T>(XNN_EXTRA_BYTES / sizeof(T) + NumElements(input_dims));
58 kernel = std::vector<T>(input_channels * output_channels);
59 bias = std::vector<BiasType>(output_channels);
60 operator_output = std::vector<T>(NumElements(output_dims));
61 subgraph_output = std::vector<T>(operator_output.size());
62 accumulators = std::vector<int32_t>(batch_size * output_channels);
63 }
64
RandomShape(size_t num_dims)65 std::vector<size_t> RandomShape(size_t num_dims)
66 {
67 std::vector<size_t> dims(num_dims);
68 std::generate(dims.begin(), dims.end(), [&] { return dim_dist(rng); });
69 return dims;
70 }
71
NumElements(std::vector<size_t> & dims)72 size_t NumElements(std::vector<size_t>& dims)
73 {
74 return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
75 }
76
77 std::unique_ptr<std::random_device> random_device;
78 std::mt19937 rng;
79 std::uniform_int_distribution<uint32_t> input_size_dist;
80 std::uniform_int_distribution<uint32_t> kernel_size_dist;
81 std::uniform_int_distribution<uint32_t> stride_dist;
82 std::uniform_int_distribution<int32_t> i32dist;
83 std::uniform_real_distribution<float> f32dist;
84 std::uniform_real_distribution<float> scale_dist;
85 std::uniform_int_distribution<size_t> dim_dist;
86 std::uniform_int_distribution<int32_t> i8dist;
87 std::uniform_int_distribution<int32_t> u8dist;
88 std::uniform_int_distribution<int32_t> w8dist;
89
90 uint32_t batch_size;
91 size_t input_channels;
92 size_t output_channels;
93
94 float output_min;
95 float output_max;
96
97 std::vector<size_t> input_dims;
98 std::vector<size_t> kernel_dims;
99 std::vector<size_t> bias_dims;
100 std::vector<size_t> output_dims;
101
102 std::vector<T> input;
103 std::vector<T> kernel;
104 std::vector<BiasType> bias;
105 std::vector<T> operator_output;
106 std::vector<T> subgraph_output;
107 std::vector<int32_t> accumulators;
108 };
109
110 template <class T> class QuantizedFullyConnectedTestBase : public FullyConnectedTestBase<T, int32_t> {
111 protected:
initialize_accumulators_from_bias()112 void initialize_accumulators_from_bias()
113 {
114 for (size_t i = 0; i < this->batch_size; i++) {
115 for (size_t oc = 0; oc < this->output_channels; oc++) {
116 this->accumulators[i * this->output_channels + oc] = this->bias[oc];
117 }
118 }
119 }
120 };
121
122 using FullyConnectedTestQS8 = QuantizedFullyConnectedTestBase<int8_t>;
123 using FullyConnectedTestQU8 = QuantizedFullyConnectedTestBase<uint8_t>;
124 using FullyConnectedTestF32 = FullyConnectedTestBase<float>;
125
TEST_F(FullyConnectedTestQS8,define)126 TEST_F(FullyConnectedTestQS8, define)
127 {
128 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
129
130 xnn_subgraph_t subgraph = nullptr;
131 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
132 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
133
134 uint32_t input_id = XNN_INVALID_NODE_ID;
135 ASSERT_EQ(
136 xnn_status_success, xnn_define_quantized_tensor_value(
137 subgraph, xnn_datatype_qint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
138 /*external_id=*/0, /*flags=*/0, &input_id));
139 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
140
141 uint32_t kernel_id = XNN_INVALID_NODE_ID;
142 ASSERT_EQ(
143 xnn_status_success, xnn_define_quantized_tensor_value(
144 subgraph, xnn_datatype_qint8, 0, 1.0f, kernel_dims.size(), kernel_dims.data(), kernel.data(),
145 /*external_id=*/1, /*flags=*/0, &kernel_id));
146
147 uint32_t bias_id = XNN_INVALID_NODE_ID;
148 ASSERT_EQ(
149 xnn_status_success, xnn_define_quantized_tensor_value(
150 subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
151 /*external_id=*/2, /*flags=*/0, &bias_id));
152
153 uint32_t output_id = XNN_INVALID_NODE_ID;
154 ASSERT_EQ(
155 xnn_status_success, xnn_define_quantized_tensor_value(
156 subgraph, xnn_datatype_qint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
157 /*external_id=*/3, /*flags=*/0, &output_id));
158 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
159
160 ASSERT_EQ(
161 xnn_status_success,
162 xnn_define_fully_connected(subgraph, output_min, output_max, input_id, kernel_id, bias_id, output_id, /*flags=*/0));
163
164 ASSERT_EQ(subgraph->num_nodes, 1);
165 const struct xnn_node* node = &subgraph->nodes[0];
166 ASSERT_EQ(node->type, xnn_node_type_fully_connected);
167 ASSERT_EQ(node->compute_type, xnn_compute_type_qs8);
168 ASSERT_EQ(node->activation.output_min, output_min);
169 ASSERT_EQ(node->activation.output_max, output_max);
170 ASSERT_EQ(node->num_inputs, 3);
171 ASSERT_EQ(node->inputs[0], input_id);
172 ASSERT_EQ(node->inputs[1], kernel_id);
173 ASSERT_EQ(node->inputs[2], bias_id);
174 ASSERT_EQ(node->num_outputs, 1);
175 ASSERT_EQ(node->outputs[0], output_id);
176 ASSERT_EQ(node->flags, 0);
177 }
178
TEST_F(FullyConnectedTestQU8,define)179 TEST_F(FullyConnectedTestQU8, define)
180 {
181 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
182
183 xnn_subgraph_t subgraph = nullptr;
184 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
185 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
186
187 uint32_t input_id = XNN_INVALID_NODE_ID;
188 ASSERT_EQ(
189 xnn_status_success, xnn_define_quantized_tensor_value(
190 subgraph, xnn_datatype_quint8, 0, 1.0f, input_dims.size(), input_dims.data(), nullptr,
191 /*external_id=*/0, /*flags=*/0, &input_id));
192 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
193
194 uint32_t kernel_id = XNN_INVALID_NODE_ID;
195 ASSERT_EQ(
196 xnn_status_success, xnn_define_quantized_tensor_value(
197 subgraph, xnn_datatype_quint8, 0, 1.0f, kernel_dims.size(), kernel_dims.data(), kernel.data(),
198 /*external_id=*/1, /*flags=*/0, &kernel_id));
199
200 uint32_t bias_id = XNN_INVALID_NODE_ID;
201 ASSERT_EQ(
202 xnn_status_success, xnn_define_quantized_tensor_value(
203 subgraph, xnn_datatype_qint32, 0, 1.0f, bias_dims.size(), bias_dims.data(), bias.data(),
204 /*external_id=*/2, /*flags=*/0, &bias_id));
205
206 uint32_t output_id = XNN_INVALID_NODE_ID;
207 ASSERT_EQ(
208 xnn_status_success, xnn_define_quantized_tensor_value(
209 subgraph, xnn_datatype_quint8, 0, 1.0f, output_dims.size(), output_dims.data(), nullptr,
210 /*external_id=*/3, /*flags=*/0, &output_id));
211 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
212
213 ASSERT_EQ(
214 xnn_status_success, xnn_define_fully_connected(
215 subgraph, output_min, output_max, input_id, kernel_id, bias_id, output_id,
216 /*flags=*/0));
217
218 ASSERT_EQ(subgraph->num_nodes, 1);
219 const struct xnn_node* node = &subgraph->nodes[0];
220 ASSERT_EQ(node->type, xnn_node_type_fully_connected);
221 ASSERT_EQ(node->compute_type, xnn_compute_type_qu8);
222 ASSERT_EQ(node->activation.output_min, output_min);
223 ASSERT_EQ(node->activation.output_max, output_max);
224 ASSERT_EQ(node->num_inputs, 3);
225 ASSERT_EQ(node->inputs[0], input_id);
226 ASSERT_EQ(node->inputs[1], kernel_id);
227 ASSERT_EQ(node->inputs[2], bias_id);
228 ASSERT_EQ(node->num_outputs, 1);
229 ASSERT_EQ(node->outputs[0], output_id);
230 ASSERT_EQ(node->flags, 0);
231 }
232
TEST_F(FullyConnectedTestF32,define)233 TEST_F(FullyConnectedTestF32, define)
234 {
235 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
236
237 xnn_subgraph_t subgraph = nullptr;
238 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
239 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
240
241 uint32_t input_id = XNN_INVALID_NODE_ID;
242 ASSERT_EQ(
243 xnn_status_success, xnn_define_tensor_value(
244 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
245 /*external_id=*/0, /*flags=*/0, &input_id));
246 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
247
248 uint32_t kernel_id = XNN_INVALID_NODE_ID;
249 ASSERT_EQ(
250 xnn_status_success,
251 xnn_define_tensor_value(
252 subgraph, xnn_datatype_fp32, kernel_dims.size(), kernel_dims.data(), kernel.data(), /*external_id=*/1,
253 /*flags=*/0, &kernel_id));
254
255 uint32_t bias_id = XNN_INVALID_NODE_ID;
256 ASSERT_EQ(
257 xnn_status_success, xnn_define_tensor_value(
258 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
259 /*external_id=*/2, /*flags=*/0, &bias_id));
260
261 uint32_t output_id = XNN_INVALID_NODE_ID;
262 ASSERT_EQ(
263 xnn_status_success, xnn_define_tensor_value(
264 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
265 /*external_id=*/3, /*flags=*/0, &output_id));
266 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
267
268 ASSERT_EQ(
269 xnn_status_success,
270 xnn_define_fully_connected(subgraph, output_min, output_max, input_id, kernel_id, bias_id, output_id, /*flags=*/0));
271
272 ASSERT_EQ(subgraph->num_nodes, 1);
273 const struct xnn_node* node = &subgraph->nodes[0];
274 ASSERT_EQ(node->type, xnn_node_type_fully_connected);
275 ASSERT_EQ(node->compute_type, xnn_compute_type_fp32);
276 ASSERT_EQ(node->activation.output_min, output_min);
277 ASSERT_EQ(node->activation.output_max, output_max);
278 ASSERT_EQ(node->num_inputs, 3);
279 ASSERT_EQ(node->inputs[0], input_id);
280 ASSERT_EQ(node->inputs[1], kernel_id);
281 ASSERT_EQ(node->inputs[2], bias_id);
282 ASSERT_EQ(node->num_outputs, 1);
283 ASSERT_EQ(node->outputs[0], output_id);
284 ASSERT_EQ(node->flags, 0);
285 }
286
TEST_F(FullyConnectedTestQS8,matches_operator_api)287 TEST_F(FullyConnectedTestQS8, matches_operator_api)
288 {
289 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
290
291 xnn_operator_t op = nullptr;
292
293 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
294 std::generate(kernel.begin(), kernel.end(), [&]() { return w8dist(rng); });
295 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
296 std::fill(operator_output.begin(), operator_output.end(), INT8_C(0xA5));
297 std::fill(subgraph_output.begin(), subgraph_output.end(), INT8_C(0xA5));
298 const int8_t input_zero_point = -1;
299 const float input_scale = scale_dist(rng);
300 const float kernel_scale = scale_dist(rng);
301
302 // Compute reference results, without renormalization.
303 initialize_accumulators_from_bias();
304 for (size_t i = 0; i < batch_size; i++) {
305 for (size_t oc = 0; oc < output_channels; oc++) {
306 for (size_t ic = 0; ic < input_channels; ic++) {
307 accumulators[i * output_channels + oc] +=
308 (int32_t(input[i * input_channels + ic]) - int32_t(input_zero_point)) *
309 int32_t(kernel[oc * input_channels + ic]);
310 }
311 }
312 }
313
314 // Compute renormalization parameters.
315 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
316 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
317
318 float output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
319 int8_t output_zero_point = int8_t(std::max(
320 std::min(
321 lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
322 long(std::numeric_limits<int8_t>::max())),
323 long(std::numeric_limits<int8_t>::min())));
324 const int8_t quantized_output_min = xnn_qs8_quantize(output_min, output_scale, output_zero_point);
325 const int8_t quantized_output_max = xnn_qs8_quantize(output_max, output_scale, output_zero_point);
326
327 // Call operator API.
328 const xnn_status status = xnn_create_fully_connected_nc_qs8(
329 input_channels, output_channels, input_channels, output_channels, input_zero_point, input_scale, kernel_scale,
330 kernel.data(), bias.data(), output_zero_point, output_scale, quantized_output_min, quantized_output_max,
331 /*flags=*/0, nullptr, &op);
332 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
333
334 if (status == xnn_status_unsupported_hardware) {
335 GTEST_SKIP();
336 }
337
338 ASSERT_EQ(xnn_status_success, status);
339 ASSERT_NE(nullptr, op);
340 ASSERT_EQ(
341 xnn_status_success, xnn_setup_fully_connected_nc_qs8(
342 op, batch_size, input.data(), operator_output.data(),
343 /*threadpool=*/nullptr));
344
345 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
346
347 // Call subgraph API.
348 xnn_subgraph_t subgraph = nullptr;
349 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
350 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
351
352 uint32_t input_id = XNN_INVALID_NODE_ID;
353 ASSERT_EQ(
354 xnn_status_success, xnn_define_quantized_tensor_value(
355 subgraph, xnn_datatype_qint8, input_zero_point, input_scale, input_dims.size(),
356 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
357 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
358
359 uint32_t kernel_id = XNN_INVALID_NODE_ID;
360 ASSERT_EQ(
361 xnn_status_success, xnn_define_quantized_tensor_value(
362 subgraph, xnn_datatype_qint8, 0, kernel_scale, kernel_dims.size(), kernel_dims.data(),
363 kernel.data(), /*external_id=*/1, /*flags=*/0, &kernel_id));
364
365 uint32_t bias_id = XNN_INVALID_NODE_ID;
366 ASSERT_EQ(
367 xnn_status_success, xnn_define_quantized_tensor_value(
368 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
369 bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
370
371 uint32_t output_id = XNN_INVALID_NODE_ID;
372 ASSERT_EQ(
373 xnn_status_success, xnn_define_quantized_tensor_value(
374 subgraph, xnn_datatype_qint8, output_zero_point, output_scale, output_dims.size(),
375 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
376 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
377 ASSERT_EQ(
378 xnn_status_success,
379 xnn_define_fully_connected(subgraph, output_min, output_max, input_id, kernel_id, bias_id, output_id, /*flags=*/0));
380
381 xnn_runtime_t runtime = nullptr;
382 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
383 ASSERT_NE(nullptr, runtime);
384 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
385 std::array<xnn_external_value, 2> external = {
386 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
387 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
388 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
389
390 // Check outputs match.
391 for (size_t i = 0; i < operator_output.size(); i++) {
392 ASSERT_EQ(subgraph_output[i], operator_output[i]);
393 }
394 }
395
TEST_F(FullyConnectedTestQU8,matches_operator_api)396 TEST_F(FullyConnectedTestQU8, matches_operator_api)
397 {
398 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
399
400 xnn_operator_t op = nullptr;
401
402 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
403 std::generate(kernel.begin(), kernel.end(), [&]() { return u8dist(rng); });
404 std::generate(bias.begin(), bias.end(), [&]() { return i32dist(rng); });
405 std::fill(operator_output.begin(), operator_output.end(), UINT8_C(0xA5));
406 std::fill(subgraph_output.begin(), subgraph_output.end(), UINT8_C(0xA5));
407 const uint8_t input_zero_point = u8dist(rng);
408 const uint8_t kernel_zero_point = 0;
409 const float input_scale = scale_dist(rng);
410 const float kernel_scale = scale_dist(rng);
411
412 // Compute reference results, without renormalization.
413 initialize_accumulators_from_bias();
414 for (size_t i = 0; i < batch_size; i++) {
415 for (size_t oc = 0; oc < output_channels; oc++) {
416 for (size_t ic = 0; ic < input_channels; ic++) {
417 accumulators[i * output_channels + oc] +=
418 (int32_t(input[i * input_channels + ic]) - int32_t(input_zero_point)) *
419 (int32_t(kernel[oc * input_channels + ic]) - int32_t(kernel_zero_point));
420 }
421 }
422 }
423
424 // Compute renormalization parameters.
425 const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
426 const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
427
428 const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;
429 const uint8_t output_zero_point = uint8_t(std::max(
430 std::min(
431 lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
432 long(std::numeric_limits<uint8_t>::max())),
433 long(std::numeric_limits<uint8_t>::min())));
434 const uint8_t quantized_output_min = xnn_qu8_quantize(output_min, output_scale, output_zero_point);
435 const uint8_t quantized_output_max = xnn_qu8_quantize(output_max, output_scale, output_zero_point);
436
437 // Call operator API.
438 const xnn_status status = xnn_create_fully_connected_nc_qu8(
439 input_channels, output_channels, input_channels, output_channels, input_zero_point, input_scale, kernel_zero_point,
440 kernel_scale, kernel.data(), bias.data(), output_zero_point, output_scale, quantized_output_min,
441 quantized_output_max, /*flags=*/0, nullptr, &op);
442 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
443
444 if (status == xnn_status_unsupported_hardware) {
445 GTEST_SKIP();
446 }
447
448 ASSERT_EQ(xnn_status_success, status);
449 ASSERT_NE(nullptr, op);
450 ASSERT_EQ(
451 xnn_status_success, xnn_setup_fully_connected_nc_qu8(
452 op, batch_size, input.data(), operator_output.data(),
453 /*threadpool=*/nullptr));
454
455 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
456
457 // Call subgraph API.
458 xnn_subgraph_t subgraph = nullptr;
459 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
460 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
461
462 uint32_t input_id = XNN_INVALID_NODE_ID;
463 ASSERT_EQ(
464 xnn_status_success, xnn_define_quantized_tensor_value(
465 subgraph, xnn_datatype_quint8, input_zero_point, input_scale, input_dims.size(),
466 input_dims.data(), nullptr, /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
467 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
468
469 uint32_t kernel_id = XNN_INVALID_NODE_ID;
470 ASSERT_EQ(
471 xnn_status_success, xnn_define_quantized_tensor_value(
472 subgraph, xnn_datatype_quint8, 0, kernel_scale, kernel_dims.size(), kernel_dims.data(),
473 kernel.data(), /*external_id=*/1, /*flags=*/0, &kernel_id));
474
475 uint32_t bias_id = XNN_INVALID_NODE_ID;
476 ASSERT_EQ(
477 xnn_status_success, xnn_define_quantized_tensor_value(
478 subgraph, xnn_datatype_qint32, 0, kernel_scale, bias_dims.size(), bias_dims.data(),
479 bias.data(), /*external_id=*/2, /*flags=*/0, &bias_id));
480
481 uint32_t output_id = XNN_INVALID_NODE_ID;
482 ASSERT_EQ(
483 xnn_status_success, xnn_define_quantized_tensor_value(
484 subgraph, xnn_datatype_quint8, output_zero_point, output_scale, output_dims.size(),
485 output_dims.data(), nullptr, /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
486 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
487 ASSERT_EQ(
488 xnn_status_success,
489 xnn_define_fully_connected(subgraph, output_min, output_max, input_id, kernel_id, bias_id, output_id, /*flags=*/0));
490
491 xnn_runtime_t runtime = nullptr;
492 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
493 ASSERT_NE(nullptr, runtime);
494 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
495 std::array<xnn_external_value, 2> external = {
496 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
497 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
498 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
499
500 // Check outputs match.
501 for (size_t i = 0; i < operator_output.size(); i++) {
502 ASSERT_EQ(subgraph_output[i], operator_output[i]);
503 }
504 }
505
TEST_F(FullyConnectedTestF32,matches_operator_api)506 TEST_F(FullyConnectedTestF32, matches_operator_api)
507 {
508 ASSERT_EQ(xnn_status_success, xnn_initialize(/*allocator=*/nullptr));
509
510 xnn_operator_t op = nullptr;
511
512 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
513 std::generate(kernel.begin(), kernel.end(), [&]() { return f32dist(rng); });
514 std::generate(bias.begin(), bias.end(), [&]() { return f32dist(rng); });
515 std::fill(operator_output.begin(), operator_output.end(), nanf(""));
516 std::fill(subgraph_output.begin(), subgraph_output.end(), nanf(""));
517
518 // Call operator API.
519 const xnn_status status = xnn_create_fully_connected_nc_f32(
520 input_channels, output_channels, input_channels, output_channels, kernel.data(), bias.data(), output_min,
521 output_max,
522 /*flags=*/0, nullptr, &op);
523 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_op(op, xnn_delete_operator);
524
525 if (status == xnn_status_unsupported_hardware) {
526 GTEST_SKIP();
527 }
528
529 ASSERT_EQ(xnn_status_success, status);
530 ASSERT_NE(nullptr, op);
531 ASSERT_EQ(
532 xnn_status_success, xnn_setup_fully_connected_nc_f32(
533 op, batch_size, input.data(), operator_output.data(),
534 /*threadpool=*/nullptr));
535
536 ASSERT_EQ(xnn_status_success, xnn_run_operator(op, /*threadpool=*/nullptr));
537
538 // Call subgraph API.
539 xnn_subgraph_t subgraph = nullptr;
540 ASSERT_EQ(xnn_status_success, xnn_create_subgraph(4, /*flags=*/0, &subgraph));
541 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> auto_subgraph(subgraph, xnn_delete_subgraph);
542
543 uint32_t input_id = XNN_INVALID_NODE_ID;
544 ASSERT_EQ(
545 xnn_status_success, xnn_define_tensor_value(
546 subgraph, xnn_datatype_fp32, input_dims.size(), input_dims.data(), nullptr,
547 /*external_id=*/0, XNN_VALUE_FLAG_EXTERNAL_INPUT, &input_id));
548 ASSERT_NE(input_id, XNN_INVALID_NODE_ID);
549
550 uint32_t kernel_id = XNN_INVALID_NODE_ID;
551 ASSERT_EQ(
552 xnn_status_success, xnn_define_tensor_value(
553 subgraph, xnn_datatype_fp32, kernel_dims.size(), kernel_dims.data(), kernel.data(),
554 /*external_id=*/1, /*flags=*/0, &kernel_id));
555
556 uint32_t bias_id = XNN_INVALID_NODE_ID;
557 ASSERT_EQ(
558 xnn_status_success, xnn_define_tensor_value(
559 subgraph, xnn_datatype_fp32, bias_dims.size(), bias_dims.data(), bias.data(),
560 /*external_id=*/2, /*flags=*/0, &bias_id));
561
562 uint32_t output_id = XNN_INVALID_NODE_ID;
563 ASSERT_EQ(
564 xnn_status_success, xnn_define_tensor_value(
565 subgraph, xnn_datatype_fp32, output_dims.size(), output_dims.data(), nullptr,
566 /*external_id=*/3, XNN_VALUE_FLAG_EXTERNAL_OUTPUT, &output_id));
567 ASSERT_NE(output_id, XNN_INVALID_NODE_ID);
568 ASSERT_EQ(
569 xnn_status_success,
570 xnn_define_fully_connected(subgraph, output_min, output_max, input_id, kernel_id, bias_id, output_id, /*flags=*/0));
571
572 xnn_runtime_t runtime = nullptr;
573 ASSERT_EQ(xnn_status_success, xnn_create_runtime_v3(subgraph, nullptr, nullptr, /*flags=*/0, &runtime));
574 ASSERT_NE(nullptr, runtime);
575 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(runtime, xnn_delete_runtime);
576 std::array<xnn_external_value, 2> external = {
577 xnn_external_value{input_id, input.data()}, xnn_external_value{output_id, subgraph_output.data()}};
578 ASSERT_EQ(xnn_status_success, xnn_setup_runtime(runtime, external.size(), external.data()));
579 ASSERT_EQ(xnn_status_success, xnn_invoke_runtime(runtime));
580
581 // Check outputs match.
582 for (size_t i = 0; i < operator_output.size(); i++) {
583 ASSERT_EQ(subgraph_output[i], operator_output[i]);
584 }
585 }
586