1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <array>
8 #include <cfloat>
9 #include <cmath>
10 #include <functional>
11 #include <random>
12 #include <string>
13 #include <vector>
14
15 #include <xnnpack.h>
16
17 #include <benchmark/benchmark.h>
18 #ifdef BENCHMARK_TENSORFLOW_LITE
19 #include "flatbuffers/include/flatbuffers/flatbuffers.h"
20 #include "tensorflow/lite/interpreter.h"
21 #include "tensorflow/lite/kernels/register.h"
22 #include "tensorflow/lite/model.h"
23 #include "tensorflow/lite/schema/schema_generated.h"
24 #include "tensorflow/lite/version.h"
25 #endif // BENCHMARK_TENSORFLOW_LITE */
26 #include "bench/utils.h"
27
28 #ifndef XNN_NO_QU8_OPERATORS
xnnpack_deconvolution_qu8(benchmark::State & state,const char * net)29 void xnnpack_deconvolution_qu8(benchmark::State& state, const char* net) {
30 const size_t batch_size = state.range(0);
31 const size_t input_height = state.range(1);
32 const size_t input_width = state.range(2);
33 const size_t kernel_height = state.range(3);
34 const size_t kernel_width = state.range(4);
35 const size_t padding_height = state.range(5);
36 const size_t padding_width = state.range(6);
37 const size_t adjustment = state.range(7);
38 const size_t stride_height = state.range(8);
39 const size_t stride_width = state.range(9);
40 const size_t dilation = state.range(10);
41 const size_t input_channels = state.range(11);
42 const size_t output_channels = state.range(12);
43
44 std::random_device random_device;
45 auto rng = std::mt19937(random_device());
46 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
47 auto u8rng = std::bind(
48 std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()),
49 std::ref(rng));
50
51 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
52 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
53 const size_t padding_left = padding_width / 2;
54 const size_t padding_top = padding_height / 2;
55 const size_t padding_right = padding_width - padding_left;
56 const size_t padding_bottom = padding_height - padding_top;
57 const size_t output_height = std::max(stride_height * (input_height - 1) + adjustment + effective_kernel_height, padding_height) - padding_height;
58 const size_t output_width = std::max(stride_width * (input_width - 1) + adjustment + effective_kernel_width, padding_width) - padding_width;
59
60 std::vector<uint8_t> input(batch_size * input_height * input_width * input_channels);
61 std::generate(input.begin(), input.end(), std::ref(u8rng));
62 std::vector<uint8_t> kernel(output_channels * kernel_height * kernel_width * input_channels);
63 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
64 std::vector<int32_t> bias(output_channels);
65 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
66 const size_t output_elements = batch_size * output_height * output_width * output_channels;
67
68 xnn_status status = xnn_initialize(nullptr /* allocator */);
69 if (status != xnn_status_success) {
70 state.SkipWithError("failed to initialize XNNPACK");
71 return;
72 }
73
74 const size_t num_buffers = 1 +
75 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
76 sizeof(float) * (kernel.size() + bias.size() + output_elements));
77 std::vector<uint8_t> output(output_elements * num_buffers);
78
79 std::vector<xnn_operator_t> deconvolution_operators(num_buffers);
80 for (xnn_operator_t& deconvolution_op : deconvolution_operators) {
81 status = xnn_create_deconvolution2d_nhwc_qu8(
82 padding_top, padding_right, padding_bottom, padding_left,
83 kernel_height, kernel_width,
84 stride_height, stride_width,
85 dilation, dilation,
86 /*groups=*/1, input_channels, output_channels,
87 /*input_pixel_stride=*/input_channels, /*output_pixel_stride=*/output_channels,
88 127, 0.5f, 127, 0.5f,
89 kernel.data(), bias.data(),
90 127, 0.5f, 0, 255,
91 0 /* flags */,
92 NULL,
93 &deconvolution_op);
94 if (status != xnn_status_success) {
95 state.SkipWithError("failed to create QINT8 Deconvolution operator");
96 return;
97 }
98 }
99
100 for (size_t i = 0; i < deconvolution_operators.size(); i++) {
101 status = xnn_setup_deconvolution2d_nhwc_qu8(
102 deconvolution_operators[i],
103 batch_size, input_height, input_width,
104 0 /* height adjustment */, 0 /* width adjustment */,
105 input.data(), output.data() + i * output_elements,
106 nullptr /* thread pool */);
107 if (status != xnn_status_success) {
108 state.SkipWithError("failed to setup QINT8 Deconvolution operator");
109 return;
110 }
111 }
112
113 size_t buffer_index = 0;
114 for (auto _ : state) {
115 state.PauseTiming();
116 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
117 buffer_index = (buffer_index + 1) % num_buffers;
118 state.ResumeTiming();
119
120 status = xnn_run_operator(deconvolution_operators[buffer_index], nullptr /* thread pool */);
121 if (status != xnn_status_success) {
122 state.SkipWithError("failed to run QINT8 Deconvolution operator");
123 return;
124 }
125 }
126
127 for (xnn_operator_t& deconvolution_op : deconvolution_operators) {
128 status = xnn_delete_operator(deconvolution_op);
129 if (status != xnn_status_success) {
130 state.SkipWithError("failed to delete QINT8 Deconvolution operator");
131 return;
132 }
133 deconvolution_op = nullptr;
134 }
135
136 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
137 if (cpu_frequency != 0) {
138 state.counters["cpufreq"] = cpu_frequency;
139 }
140
141 state.counters["OPS"] = benchmark::Counter(
142 uint64_t(state.iterations()) * 2 *
143 batch_size * input_width * input_width *
144 input_channels * output_channels *
145 kernel_height * kernel_width,
146 benchmark::Counter::kIsRate);
147 }
148 #endif // XNN_NO_QU8_OPERATORS
149
xnnpack_deconvolution_f32(benchmark::State & state,const char * net)150 void xnnpack_deconvolution_f32(benchmark::State& state, const char* net) {
151 const size_t batch_size = state.range(0);
152 const size_t input_height = state.range(1);
153 const size_t input_width = state.range(2);
154 const size_t kernel_height = state.range(3);
155 const size_t kernel_width = state.range(4);
156 const size_t padding_height = state.range(5);
157 const size_t padding_width = state.range(6);
158 const size_t adjustment = state.range(7);
159 const size_t stride_height = state.range(8);
160 const size_t stride_width = state.range(9);
161 const size_t dilation = state.range(10);
162 const size_t input_channels = state.range(11);
163 const size_t output_channels = state.range(12);
164
165 std::random_device random_device;
166 auto rng = std::mt19937(random_device());
167 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
168
169 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
170 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
171 const size_t padding_left = padding_width / 2;
172 const size_t padding_top = padding_height / 2;
173 const size_t padding_right = padding_width - padding_left;
174 const size_t padding_bottom = padding_height - padding_top;
175 const size_t output_height = std::max(stride_height * (input_height - 1) + adjustment + effective_kernel_height, padding_height) - padding_height;
176 const size_t output_width = std::max(stride_width * (input_width - 1) + adjustment + effective_kernel_width, padding_width) - padding_width;
177
178 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
179 batch_size * input_height * input_width * input_channels);
180 std::generate(input.begin(), input.end(), std::ref(f32rng));
181 std::vector<float> kernel(output_channels * kernel_height * kernel_width * input_channels);
182 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
183 std::vector<float> bias(output_channels);
184 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
185 const size_t output_elements = batch_size * output_height * output_width * output_channels;
186
187 xnn_status status = xnn_initialize(nullptr /* allocator */);
188 if (status != xnn_status_success) {
189 state.SkipWithError("failed to initialize XNNPACK");
190 return;
191 }
192
193 const size_t num_buffers = 1 +
194 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
195 sizeof(float) * (kernel.size() + bias.size() + output_elements));
196 std::vector<float> output(output_elements * num_buffers);
197
198 std::vector<xnn_operator_t> deconvolution_operators(num_buffers);
199 for (xnn_operator_t& deconvolution_op : deconvolution_operators) {
200 status = xnn_create_deconvolution2d_nhwc_f32(
201 padding_top, padding_right, padding_bottom, padding_left,
202 kernel_height, kernel_width,
203 stride_height, stride_width,
204 dilation, dilation,
205 /*groups=*/1, input_channels, output_channels,
206 /*input_pixel_stride=*/input_channels, /*output_pixel_stride=*/output_channels,
207 kernel.data(), bias.data(),
208 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
209 0 /* flags */,
210 NULL,
211 &deconvolution_op);
212 if (status != xnn_status_success) {
213 state.SkipWithError("failed to create FP32 Deconvolution operator");
214 return;
215 }
216 }
217
218 for (size_t i = 0; i < deconvolution_operators.size(); i++) {
219 status = xnn_setup_deconvolution2d_nhwc_f32(
220 deconvolution_operators[i],
221 batch_size, input_height, input_width,
222 0 /* height adjustment */, 0 /* width adjustment */,
223 input.data(), output.data() + i * output_elements,
224 nullptr /* thread pool */);
225 if (status != xnn_status_success) {
226 state.SkipWithError("failed to setup QINT8 Deconvolution operator");
227 return;
228 }
229 }
230
231 size_t buffer_index = 0;
232 for (auto _ : state) {
233 state.PauseTiming();
234 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
235 buffer_index = (buffer_index + 1) % num_buffers;
236 state.ResumeTiming();
237
238 status = xnn_run_operator(deconvolution_operators[buffer_index], nullptr /* thread pool */);
239 if (status != xnn_status_success) {
240 state.SkipWithError("failed to run FP32 Deconvolution operator");
241 return;
242 }
243 }
244
245 for (xnn_operator_t& deconvolution_op : deconvolution_operators) {
246 status = xnn_delete_operator(deconvolution_op);
247 if (status != xnn_status_success) {
248 state.SkipWithError("failed to delete FP32 Deconvolution operator");
249 return;
250 }
251 deconvolution_op = nullptr;
252 }
253
254 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
255 if (cpu_frequency != 0) {
256 state.counters["cpufreq"] = cpu_frequency;
257 }
258
259 state.counters["FLOPS"] = benchmark::Counter(
260 uint64_t(state.iterations()) * 2 *
261 batch_size * input_width * input_width *
262 input_channels * output_channels *
263 kernel_height * kernel_width,
264 benchmark::Counter::kIsRate);
265 }
266
267 #ifdef BENCHMARK_TENSORFLOW_LITE
tflite_deconvolution_f32(benchmark::State & state,const char * net)268 void tflite_deconvolution_f32(benchmark::State& state, const char* net) {
269 const size_t batch_size = state.range(0);
270 const size_t input_height = state.range(1);
271 const size_t input_width = state.range(2);
272 const size_t kernel_height = state.range(3);
273 const size_t kernel_width = state.range(4);
274 const size_t padding_height = state.range(5);
275 const size_t padding_width = state.range(6);
276 const size_t adjustment = state.range(7);
277 const size_t stride_height = state.range(8);
278 const size_t stride_width = state.range(9);
279 const size_t dilation = state.range(10);
280 const size_t input_channels = state.range(11);
281 const size_t output_channels = state.range(12);
282
283 if (dilation != 1) {
284 state.SkipWithError("dilated deconvolution is not supported");
285 return;
286 }
287
288 std::random_device random_device;
289 auto rng = std::mt19937(random_device());
290 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
291
292 tflite::Padding tf_padding = tflite::Padding_VALID;
293 if (padding_width == kernel_width - stride_width && padding_height == kernel_height - stride_height) {
294 tf_padding = tflite::Padding_SAME;
295 } else if (padding_width == 0 && padding_height == 0) {
296 tf_padding = tflite::Padding_VALID;
297 } else {
298 state.SkipWithError("unsupported padding");
299 return;
300 }
301
302 const size_t output_height = std::max(stride_height * (input_height - 1) + adjustment + kernel_height, padding_height) - padding_height;
303 const size_t output_width = std::max(stride_width * (input_width - 1) + adjustment + kernel_width, padding_width) - padding_width;
304
305 std::vector<float> kernel(output_channels * kernel_height * kernel_width * input_channels);
306 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
307
308 flatbuffers::FlatBufferBuilder builder;
309 flatbuffers::Offset<tflite::OperatorCode> operator_code =
310 CreateOperatorCode(builder, tflite::BuiltinOperator_TRANSPOSE_CONV, 0);
311
312 flatbuffers::Offset<tflite::TransposeConvOptions> transpose_conv_options = CreateTransposeConvOptions(
313 builder,
314 tf_padding,
315 static_cast<int32_t>(stride_width), static_cast<int32_t>(stride_height));
316
317 const std::array<int32_t, 4> input_shape{{
318 static_cast<int32_t>(batch_size),
319 static_cast<int32_t>(input_height),
320 static_cast<int32_t>(input_width),
321 static_cast<int32_t>(input_channels)
322 }};
323 const std::array<int32_t, 4> output_shape{{
324 static_cast<int32_t>(batch_size),
325 static_cast<int32_t>(output_height),
326 static_cast<int32_t>(output_width),
327 static_cast<int32_t>(output_channels)
328 }};
329 const std::array<int32_t, 4> filter_shape{{
330 static_cast<int32_t>(output_channels),
331 static_cast<int32_t>(kernel_height),
332 static_cast<int32_t>(kernel_width),
333 static_cast<int32_t>(input_channels)
334 }};
335 const std::array<int32_t, 1> output_shape_shape{{ 4 }};
336
337 const std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers{{
338 tflite::CreateBuffer(builder, builder.CreateVector({})),
339 tflite::CreateBuffer(builder, builder.CreateVector(
340 reinterpret_cast<const uint8_t*>(kernel.data()),
341 sizeof(float) * kernel.size())),
342 tflite::CreateBuffer(builder, builder.CreateVector(
343 reinterpret_cast<const uint8_t*>(output_shape.data()),
344 sizeof(int32_t) * output_shape.size())),
345 }};
346
347 const std::array<flatbuffers::Offset<tflite::Tensor>, 4> tensors{{
348 tflite::CreateTensor(builder,
349 builder.CreateVector<int32_t>(output_shape_shape.data(), output_shape_shape.size()),
350 tflite::TensorType_INT32,
351 2 /* buffer id */),
352 tflite::CreateTensor(builder,
353 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
354 tflite::TensorType_FLOAT32,
355 1 /* buffer id */),
356 tflite::CreateTensor(builder,
357 builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
358 tflite::TensorType_FLOAT32),
359 tflite::CreateTensor(builder,
360 builder.CreateVector<int32_t>(output_shape.data(), output_shape.size()),
361 tflite::TensorType_FLOAT32),
362 }};
363
364 const std::array<int32_t, 3> op_inputs{{ 0, 1, 2 }};
365 const std::array<int32_t, 1> op_outputs{{ 3 }};
366 flatbuffers::Offset<tflite::Operator> op = CreateOperator(
367 builder,
368 0 /* opcode_index */,
369 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
370 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
371 tflite::BuiltinOptions_TransposeConvOptions,
372 transpose_conv_options.Union());
373
374 const std::array<int32_t, 1> graph_inputs{{ 2 }};
375 const std::array<int32_t, 1> graph_outputs{{ 3 }};
376 flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
377 builder,
378 builder.CreateVector(tensors.data(), tensors.size()),
379 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
380 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
381 builder.CreateVector(&op, 1),
382 builder.CreateString("TransposeConv subgraph"));
383
384 const flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("TransposeConv model");
385
386 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
387 TFLITE_SCHEMA_VERSION,
388 builder.CreateVector(&operator_code, 1),
389 builder.CreateVector(&subgraph, 1),
390 description,
391 builder.CreateVector(buffers.data(), buffers.size()));
392
393 builder.Finish(model_buffer);
394
395 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
396 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
397 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
398 std::unique_ptr<tflite::Interpreter> interpreter;
399 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
400 state.SkipWithError("failed to create TFLite interpreter");
401 return;
402 }
403 if (interpreter == nullptr) {
404 state.SkipWithError("TFLite interpreter is null");
405 return;
406 }
407 interpreter->SetNumThreads(1);
408
409 if (interpreter->AllocateTensors() != kTfLiteOk) {
410 state.SkipWithError("failed to allocate tensors");
411 return;
412 }
413
414 std::generate(
415 interpreter->typed_tensor<float>(2),
416 interpreter->typed_tensor<float>(2) + batch_size * input_channels * input_height * input_width,
417 std::ref(f32rng));
418
419 for (auto _ : state) {
420 state.PauseTiming();
421 benchmark::utils::WipeCache();
422 benchmark::utils::PrefetchToL1(
423 interpreter->typed_tensor<float>(2),
424 batch_size * input_channels * input_height * input_width * sizeof(float));
425 state.ResumeTiming();
426
427 if (interpreter->Invoke() != kTfLiteOk) {
428 state.SkipWithError("failed to invoke TFLite interpreter");
429 return;
430 }
431 }
432
433 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
434 if (cpu_frequency != 0) {
435 state.counters["cpufreq"] = cpu_frequency;
436 }
437
438 state.counters["FLOPS"] = benchmark::Counter(
439 uint64_t(state.iterations()) * 2 *
440 batch_size * input_width * input_width *
441 input_channels * output_channels *
442 kernel_height * kernel_width,
443 benchmark::Counter::kIsRate);
444
445 interpreter.reset();
446 }
447 #endif // BENCHMARK_TENSORFLOW_LITE
448
449 // FCN-32 model (PASCAL VOC version).
450 // We assume CIF image (352x288) on model input / output.
FCN32(benchmark::internal::Benchmark * b)451 static void FCN32(benchmark::internal::Benchmark* b) {
452 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "A", "SH", "SW", "D", "Cin", "Cout"});
453
454 /* N H W KH KW PH PW A SH SW D Cin Cout */
455 b->Args({1, 9, 11, 64, 64, 0, 0, 0, 32, 32, 1, 21, 21});
456 }
457
458 // FCN-16 model (PASCAL VOC version).
459 // We assume CIF image (352x288) on model input / output.
FCN16(benchmark::internal::Benchmark * b)460 static void FCN16(benchmark::internal::Benchmark* b) {
461 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "A", "SH", "SW", "D", "Cin", "Cout"});
462
463 /* N H W KH KW PH PW A SH SW D Cin Cout */
464 b->Args({1, 9, 11, 4, 4, 0, 0, 0, 2, 2, 1, 21, 21});
465 b->Args({1, 18, 22, 32, 32, 0, 0, 0, 16, 16, 1, 21, 21});
466 }
467
468 // FCN-8 model (PASCAL VOC version).
469 // We assume CIF image (352x288) on model input / output.
FCN8(benchmark::internal::Benchmark * b)470 static void FCN8(benchmark::internal::Benchmark* b) {
471 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "A", "SH", "SW", "D", "Cin", "Cout"});
472
473 /* N H W KH KW PH PW A SH SW D Cin Cout */
474 b->Args({1, 9, 11, 4, 4, 0, 0, 0, 2, 2, 1, 21, 21});
475 b->Args({1, 18, 22, 4, 4, 0, 0, 0, 2, 2, 1, 21, 21});
476 b->Args({1, 36, 44, 16, 16, 0, 0, 0, 8, 8, 1, 21, 21});
477 }
478
ENet(benchmark::internal::Benchmark * b)479 static void ENet(benchmark::internal::Benchmark* b) {
480 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "A", "SH", "SW", "D", "Cin", "Cout"});
481
482 /*********************** Bottleneck 4.0 ***********************/
483 /* N H W KH KW PH PW A SH SW D Cin Cout */
484 b->Args({1, 64, 64, 3, 3, 2, 2, 1, 2, 2, 1, 32, 32});
485 /*********************** Bottleneck 5.0 ***********************/
486 /* N H W KH KW PH PW A SH SW D Cin Cout */
487 b->Args({1, 128, 128, 3, 3, 2, 2, 1, 2, 2, 1, 16, 16});
488 /******************* Final Full Convolution *******************/
489 /* N H W KH KW PH PW A SH SH D Cin Cout */
490 b->Args({1, 256, 256, 2, 2, 0, 0, 0, 2, 2, 1, 16, 12});
491 }
492
ESPNet(benchmark::internal::Benchmark * b)493 static void ESPNet(benchmark::internal::Benchmark* b) {
494 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "A", "SH", "SW", "D", "Cin", "Cout"});
495
496 /* N H W KH KW PH PW A SH SW D Cin Cout */
497 b->Args({1, 64, 128, 2, 2, 0, 0, 0, 2, 2, 1, 20, 20});
498 b->Args({1, 128, 256, 2, 2, 0, 0, 0, 2, 2, 1, 20, 20});
499 b->Args({1, 256, 512, 2, 2, 0, 0, 0, 2, 2, 1, 20, 20});
500 }
501
502 BENCHMARK_CAPTURE(xnnpack_deconvolution_f32, fcn32, "FCN-32")
503 ->Apply(FCN32)
504 ->UseRealTime();
505 BENCHMARK_CAPTURE(xnnpack_deconvolution_f32, fcn16, "FCN-16")
506 ->Apply(FCN16)
507 ->UseRealTime();
508 BENCHMARK_CAPTURE(xnnpack_deconvolution_f32, fcn8, "FCN-8")
509 ->Apply(FCN8)
510 ->UseRealTime();
511 BENCHMARK_CAPTURE(xnnpack_deconvolution_f32, enet, "ENet")
512 ->Apply(ENet)
513 ->UseRealTime();
514 BENCHMARK_CAPTURE(xnnpack_deconvolution_f32, espnet, "ESPNet")
515 ->Apply(ESPNet)
516 ->UseRealTime();
517
518 #ifndef XNN_NO_QU8_OPERATORS
519 BENCHMARK_CAPTURE(xnnpack_deconvolution_qu8, fcn32, "FCN-32")
520 ->Apply(FCN32)
521 ->UseRealTime();
522 BENCHMARK_CAPTURE(xnnpack_deconvolution_qu8, fcn16, "FCN-16")
523 ->Apply(FCN16)
524 ->UseRealTime();
525 BENCHMARK_CAPTURE(xnnpack_deconvolution_qu8, fcn8, "FCN-8")
526 ->Apply(FCN8)
527 ->UseRealTime();
528 BENCHMARK_CAPTURE(xnnpack_deconvolution_qu8, enet, "ENet")
529 ->Apply(ENet)
530 ->UseRealTime();
531 BENCHMARK_CAPTURE(xnnpack_deconvolution_qu8, espnet, "ESPNet")
532 ->Apply(ESPNet)
533 ->UseRealTime();
534 #endif // XNN_NO_QU8_OPERATORS
535
536 #ifdef BENCHMARK_TENSORFLOW_LITE
537 BENCHMARK_CAPTURE(tflite_deconvolution_f32, fcn32, "FCN-32")
538 ->Apply(FCN32)
539 ->UseRealTime();
540 BENCHMARK_CAPTURE(tflite_deconvolution_f32, fcn16, "FCN-16")
541 ->Apply(FCN16)
542 ->UseRealTime();
543 BENCHMARK_CAPTURE(tflite_deconvolution_f32, fcn8, "FCN-8")
544 ->Apply(FCN8)
545 ->UseRealTime();
546 BENCHMARK_CAPTURE(tflite_deconvolution_f32, enet, "ENet")
547 ->Apply(ENet)
548 ->UseRealTime();
549 BENCHMARK_CAPTURE(tflite_deconvolution_f32, espnet, "ESPNet")
550 ->Apply(ESPNet)
551 ->UseRealTime();
552 #endif // BENCHMARK_TENSORFLOW_LITE
553
554 #ifndef XNNPACK_BENCHMARK_NO_MAIN
555 BENCHMARK_MAIN();
556 #endif
557