1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <algorithm>
10 #include <cfloat>
11 #include <cmath>
12 #include <functional>
13 #include <limits>
14 #include <ostream>
15 #include <random>
16 #include <string>
17 #include <vector>
18
19 #include <xnnpack.h>
20
21 #include <benchmark/benchmark.h>
22 #include <fp16.h>
23 #ifdef BENCHMARK_TENSORFLOW_LITE
24 #include "flatbuffers/include/flatbuffers/flatbuffers.h"
25 #include "tensorflow/lite/interpreter.h"
26 #include "tensorflow/lite/kernels/register.h"
27 #include "tensorflow/lite/model.h"
28 #include "tensorflow/lite/schema/schema_generated.h"
29 #include "tensorflow/lite/version.h"
30 #endif // BENCHMARK_TENSORFLOW_LITE
31 #include "bench/utils.h"
32
33 #ifndef XNN_NO_QU8_OPERATORS
xnnpack_convolution_qu8(benchmark::State & state,const char * net)34 void xnnpack_convolution_qu8(benchmark::State& state, const char* net) {
35 const size_t batch_size = state.range(0);
36 const size_t input_height = state.range(1);
37 const size_t input_width = state.range(2);
38 const size_t kernel_height = state.range(3);
39 const size_t kernel_width = state.range(4);
40 const size_t padding_height = state.range(5);
41 const size_t padding_width = state.range(6);
42 const size_t subsampling = state.range(7);
43 const size_t dilation = state.range(8);
44 const size_t groups = state.range(9);
45 const size_t group_input_channels = state.range(10);
46 const size_t group_output_channels = state.range(11);
47
48 std::random_device random_device;
49 auto rng = std::mt19937(random_device());
50 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
51 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
52
53 const size_t output_pixel_stride = groups * group_output_channels;
54 const size_t input_pixel_stride = groups * group_input_channels;
55 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
56 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
57 const size_t padding_left = padding_width / 2;
58 const size_t padding_top = padding_height / 2;
59 const size_t padding_right = padding_width - padding_left;
60 const size_t padding_bottom = padding_height - padding_top;
61 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
62 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
63
64 std::vector<uint8_t> input(batch_size * input_height * input_width * input_pixel_stride);
65 std::generate(input.begin(), input.end(), std::ref(u8rng));
66 std::vector<uint8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
67 std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
68 std::vector<int32_t> bias(groups * group_output_channels);
69 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
70 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
71
72 xnn_status status = xnn_initialize(nullptr /* allocator */);
73 if (status != xnn_status_success) {
74 state.SkipWithError("failed to initialize XNNPACK");
75 return;
76 }
77
78 const size_t num_buffers = 1 +
79 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
80 sizeof(uint8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(uint8_t) * output_elements);
81 std::vector<uint8_t> output(output_elements * num_buffers);
82
83 std::vector<xnn_operator_t> convolution_operators(num_buffers);
84 for (xnn_operator_t& convolution_op : convolution_operators) {
85 status = xnn_create_convolution2d_nhwc_qu8(
86 padding_top, padding_right, padding_bottom, padding_left,
87 kernel_height, kernel_width,
88 subsampling, subsampling,
89 dilation, dilation,
90 groups, group_input_channels, group_output_channels,
91 input_pixel_stride, output_pixel_stride,
92 127, 0.5f,
93 127, 0.5f,
94 kernel.data(), bias.data(),
95 127, 0.5f, 0, 255,
96 0 /* flags */, NULL, &convolution_op);
97 if (status != xnn_status_success) {
98 state.SkipWithError("failed to create QUINT8 Convolution operator");
99 return;
100 }
101 }
102
103 for (size_t i = 0; i < convolution_operators.size(); i++) {
104 status = xnn_setup_convolution2d_nhwc_qu8(
105 convolution_operators[i],
106 batch_size, input_height, input_width,
107 input.data(), output.data() + i * output_elements,
108 nullptr /* thread pool */);
109 if (status != xnn_status_success) {
110 state.SkipWithError("failed to setup QUINT8 Convolution operator");
111 return;
112 }
113 }
114
115 size_t buffer_index = 0;
116 for (auto _ : state) {
117 state.PauseTiming();
118 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
119 buffer_index = (buffer_index + 1) % num_buffers;
120 state.ResumeTiming();
121
122 status = xnn_run_operator(convolution_operators[buffer_index],
123 nullptr /* thread pool */);
124 if (status != xnn_status_success) {
125 state.SkipWithError("failed to run QUINT8 Convolution operator");
126 return;
127 }
128 }
129
130 for (xnn_operator_t& convolution_op : convolution_operators) {
131 status = xnn_delete_operator(convolution_op);
132 if (status != xnn_status_success) {
133 state.SkipWithError("failed to delete QUINT8 Convolution operator");
134 return;
135 }
136 convolution_op = nullptr;
137 }
138
139 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
140 if (cpu_frequency != 0) {
141 state.counters["cpufreq"] = cpu_frequency;
142 }
143
144 state.counters["OPS"] = benchmark::Counter(
145 uint64_t(state.iterations()) * 2 *
146 batch_size * output_height * output_width *
147 groups * group_input_channels * group_output_channels *
148 kernel_height * kernel_width,
149 benchmark::Counter::kIsRate);
150 }
151 #endif // XNN_NO_QU8_OPERATORS
152
153 #ifndef XNN_NO_QS8_OPERATORS
xnnpack_convolution_qs8(benchmark::State & state,const char * net)154 void xnnpack_convolution_qs8(benchmark::State& state, const char* net) {
155 const size_t batch_size = state.range(0);
156 const size_t input_height = state.range(1);
157 const size_t input_width = state.range(2);
158 const size_t kernel_height = state.range(3);
159 const size_t kernel_width = state.range(4);
160 const size_t padding_height = state.range(5);
161 const size_t padding_width = state.range(6);
162 const size_t subsampling = state.range(7);
163 const size_t dilation = state.range(8);
164 const size_t groups = state.range(9);
165 const size_t group_input_channels = state.range(10);
166 const size_t group_output_channels = state.range(11);
167
168 std::random_device random_device;
169 auto rng = std::mt19937(random_device());
170 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
171 auto i8rng = std::bind(
172 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
173
174 const size_t output_pixel_stride = groups * group_output_channels;
175 const size_t input_pixel_stride = groups * group_input_channels;
176 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
177 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
178 const size_t padding_left = padding_width / 2;
179 const size_t padding_top = padding_height / 2;
180 const size_t padding_right = padding_width - padding_left;
181 const size_t padding_bottom = padding_height - padding_top;
182 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
183 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
184
185 std::vector<int8_t> input(batch_size * input_height * input_width * input_pixel_stride);
186 std::generate(input.begin(), input.end(), std::ref(i8rng));
187 std::vector<int8_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
188 std::generate(kernel.begin(), kernel.end(), std::ref(i8rng));
189 std::vector<int32_t> bias(groups * group_output_channels);
190 std::generate(bias.begin(), bias.end(), std::ref(i32rng));
191 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
192
193 xnn_status status = xnn_initialize(nullptr /* allocator */);
194 if (status != xnn_status_success) {
195 state.SkipWithError("failed to initialize XNNPACK");
196 return;
197 }
198
199 const size_t num_buffers = 1 +
200 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
201 sizeof(int8_t) * kernel.size() + sizeof(int32_t) * bias.size() + sizeof(int8_t) * output_elements);
202 std::vector<int8_t> output(output_elements * num_buffers);
203
204 std::vector<xnn_operator_t> convolution_operators(num_buffers);
205 for (xnn_operator_t& convolution_op : convolution_operators) {
206 status = xnn_create_convolution2d_nhwc_qs8(
207 padding_top, padding_right, padding_bottom, padding_left,
208 kernel_height, kernel_width,
209 subsampling, subsampling,
210 dilation, dilation,
211 groups, group_input_channels, group_output_channels,
212 input_pixel_stride, output_pixel_stride,
213 127, 0.5f, 0.5f,
214 kernel.data(), bias.data(),
215 127, 0.5f, -128, 127,
216 0 /* flags */, NULL, &convolution_op);
217 if (status != xnn_status_success) {
218 state.SkipWithError("failed to create QINT8 Convolution operator");
219 return;
220 }
221 }
222
223 for (size_t i = 0; i < convolution_operators.size(); i++) {
224 status = xnn_setup_convolution2d_nhwc_qs8(
225 convolution_operators[i],
226 batch_size, input_height, input_width,
227 input.data(), output.data() + i * output_elements,
228 nullptr /* thread pool */);
229 if (status != xnn_status_success) {
230 state.SkipWithError("failed to setup QINT8 Convolution operator");
231 return;
232 }
233 }
234
235 size_t buffer_index = 0;
236 for (auto _ : state) {
237 state.PauseTiming();
238 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint8_t));
239 buffer_index = (buffer_index + 1) % num_buffers;
240 state.ResumeTiming();
241
242 status = xnn_run_operator(convolution_operators[buffer_index],
243 nullptr /* thread pool */);
244 if (status != xnn_status_success) {
245 state.SkipWithError("failed to run QINT8 Convolution operator");
246 return;
247 }
248 }
249
250 for (xnn_operator_t& convolution_op : convolution_operators) {
251 status = xnn_delete_operator(convolution_op);
252 if (status != xnn_status_success) {
253 state.SkipWithError("failed to delete QINT8 Convolution operator");
254 return;
255 }
256 convolution_op = nullptr;
257 }
258
259 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
260 if (cpu_frequency != 0) {
261 state.counters["cpufreq"] = cpu_frequency;
262 }
263
264 state.counters["OPS"] = benchmark::Counter(
265 uint64_t(state.iterations()) * 2 *
266 batch_size * output_height * output_width *
267 groups * group_input_channels * group_output_channels *
268 kernel_height * kernel_width,
269 benchmark::Counter::kIsRate);
270 }
271 #endif // XNN_NO_QS8_OPERATORS
272
273 #ifndef XNN_NO_F16_OPERATORS
xnnpack_convolution_f16(benchmark::State & state,const char * net)274 void xnnpack_convolution_f16(benchmark::State& state, const char* net) {
275 if (!benchmark::utils::CheckNEONFP16ARITH(state)) {
276 return;
277 }
278 const size_t batch_size = state.range(0);
279 const size_t input_height = state.range(1);
280 const size_t input_width = state.range(2);
281 const size_t kernel_height = state.range(3);
282 const size_t kernel_width = state.range(4);
283 const size_t padding_height = state.range(5);
284 const size_t padding_width = state.range(6);
285 const size_t subsampling = state.range(7);
286 const size_t dilation = state.range(8);
287 const size_t groups = state.range(9);
288 const size_t group_input_channels = state.range(10);
289 const size_t group_output_channels = state.range(11);
290
291 std::random_device random_device;
292 auto rng = std::mt19937(random_device());
293 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));
294 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
295
296 const size_t output_pixel_stride = groups * group_output_channels;
297 const size_t input_pixel_stride = groups * group_input_channels;
298 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
299 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
300 const size_t padding_left = padding_width / 2;
301 const size_t padding_top = padding_height / 2;
302 const size_t padding_right = padding_width - padding_left;
303 const size_t padding_bottom = padding_height - padding_top;
304 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
305 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
306
307 std::vector<uint16_t> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(uint16_t));
308 std::generate(input.begin(), input.end(), std::ref(f16rng));
309 std::vector<uint16_t> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
310 std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));
311 std::vector<uint16_t> bias(groups * group_output_channels);
312 std::generate(bias.begin(), bias.end(), std::ref(f16rng));
313 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
314
315 xnn_status status = xnn_initialize(nullptr /* allocator */);
316 if (status != xnn_status_success) {
317 state.SkipWithError("failed to initialize XNNPACK");
318 return;
319 }
320
321 const size_t num_buffers = 1 +
322 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
323 sizeof(uint16_t) * (kernel.size() + bias.size() + output_elements));
324 std::vector<uint16_t> output(output_elements * num_buffers);
325
326 std::vector<xnn_operator_t> convolution_operators(num_buffers);
327 for (xnn_operator_t& convolution_op : convolution_operators) {
328 status = xnn_create_convolution2d_nhwc_f16(
329 padding_top, padding_right, padding_bottom, padding_left,
330 kernel_height, kernel_width,
331 subsampling, subsampling,
332 dilation, dilation,
333 groups, group_input_channels, group_output_channels,
334 input_pixel_stride, output_pixel_stride,
335 kernel.data(), bias.data(),
336 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
337 0 /* flags */, NULL, &convolution_op);
338 if (status != xnn_status_success) {
339 state.SkipWithError("failed to create FP16 Convolution operator");
340 return;
341 }
342 }
343
344 for (size_t i = 0; i < convolution_operators.size(); i++) {
345 status = xnn_setup_convolution2d_nhwc_f16(
346 convolution_operators[i],
347 batch_size, input_height, input_width,
348 input.data(), output.data() + i * output_elements,
349 nullptr /* thread pool */);
350 if (status != xnn_status_success) {
351 state.SkipWithError("failed to setup FP16 Convolution operator");
352 return;
353 }
354 }
355
356 size_t buffer_index = 0;
357 for (auto _ : state) {
358 state.PauseTiming();
359 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(uint16_t));
360 buffer_index = (buffer_index + 1) % num_buffers;
361 state.ResumeTiming();
362
363 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
364 if (status != xnn_status_success) {
365 state.SkipWithError("failed to run FP16 Convolution operator");
366 return;
367 }
368 }
369
370 for (xnn_operator_t& convolution_op : convolution_operators) {
371 status = xnn_delete_operator(convolution_op);
372 if (status != xnn_status_success) {
373 state.SkipWithError("failed to delete FP16 Convolution operator");
374 return;
375 }
376 convolution_op = nullptr;
377 }
378
379 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
380 if (cpu_frequency != 0) {
381 state.counters["cpufreq"] = cpu_frequency;
382 }
383
384 state.counters["FLOPS"] = benchmark::Counter(
385 uint64_t(state.iterations()) * 2 *
386 batch_size * output_height * output_width *
387 groups * group_input_channels * group_output_channels *
388 kernel_height * kernel_width,
389 benchmark::Counter::kIsRate);
390 }
391 #endif // XNN_NO_F16_OPERATORS
392
xnnpack_convolution_f32(benchmark::State & state,const char * net)393 void xnnpack_convolution_f32(benchmark::State& state, const char* net) {
394 const size_t batch_size = state.range(0);
395 const size_t input_height = state.range(1);
396 const size_t input_width = state.range(2);
397 const size_t kernel_height = state.range(3);
398 const size_t kernel_width = state.range(4);
399 const size_t padding_height = state.range(5);
400 const size_t padding_width = state.range(6);
401 const size_t subsampling = state.range(7);
402 const size_t dilation = state.range(8);
403 const size_t groups = state.range(9);
404 const size_t group_input_channels = state.range(10);
405 const size_t group_output_channels = state.range(11);
406
407 std::random_device random_device;
408 auto rng = std::mt19937(random_device());
409 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
410
411 const size_t output_pixel_stride = groups * group_output_channels;
412 const size_t input_pixel_stride = groups * group_input_channels;
413 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
414 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
415 const size_t padding_left = padding_width / 2;
416 const size_t padding_top = padding_height / 2;
417 const size_t padding_right = padding_width - padding_left;
418 const size_t padding_bottom = padding_height - padding_top;
419 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
420 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
421
422 std::vector<float> input(batch_size * input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
423 std::generate(input.begin(), input.end(), std::ref(f32rng));
424 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
425 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
426 std::vector<float> bias(groups * group_output_channels);
427 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
428 const size_t output_elements = batch_size * output_height * output_width * output_pixel_stride;
429
430 xnn_status status = xnn_initialize(nullptr /* allocator */);
431 if (status != xnn_status_success) {
432 state.SkipWithError("failed to initialize XNNPACK");
433 return;
434 }
435
436 const size_t num_buffers = 1 +
437 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
438 sizeof(float) * (kernel.size() + bias.size() + output_elements));
439 std::vector<float> output(output_elements * num_buffers);
440
441 std::vector<xnn_operator_t> convolution_operators(num_buffers);
442 for (xnn_operator_t& convolution_op : convolution_operators) {
443 status = xnn_create_convolution2d_nhwc_f32(
444 padding_top, padding_right, padding_bottom, padding_left,
445 kernel_height, kernel_width,
446 subsampling, subsampling,
447 dilation, dilation,
448 groups, group_input_channels, group_output_channels,
449 input_pixel_stride, output_pixel_stride,
450 kernel.data(), bias.data(),
451 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
452 0 /* flags */, NULL, &convolution_op);
453 if (status != xnn_status_success) {
454 state.SkipWithError("failed to create FP32 Convolution operator");
455 return;
456 }
457 }
458
459 for (size_t i = 0; i < convolution_operators.size(); i++) {
460 status = xnn_setup_convolution2d_nhwc_f32(
461 convolution_operators[i],
462 batch_size, input_height, input_width,
463 input.data(), output.data() + i * output_elements,
464 nullptr /* thread pool */);
465 if (status != xnn_status_success) {
466 state.SkipWithError("failed to setup FP32 Convolution operator");
467 return;
468 }
469 }
470
471 size_t buffer_index = 0;
472 for (auto _ : state) {
473 state.PauseTiming();
474 benchmark::utils::PrefetchToL1(input.data(), input.size() * sizeof(float));
475 buffer_index = (buffer_index + 1) % num_buffers;
476 state.ResumeTiming();
477
478 status = xnn_run_operator(convolution_operators[buffer_index], nullptr /* thread pool */);
479 if (status != xnn_status_success) {
480 state.SkipWithError("failed to run FP32 Convolution operator");
481 return;
482 }
483 }
484
485 for (xnn_operator_t& convolution_op : convolution_operators) {
486 status = xnn_delete_operator(convolution_op);
487 if (status != xnn_status_success) {
488 state.SkipWithError("failed to delete FP32 Convolution operator");
489 return;
490 }
491 convolution_op = nullptr;
492 }
493
494 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
495 if (cpu_frequency != 0) {
496 state.counters["cpufreq"] = cpu_frequency;
497 }
498
499 state.counters["FLOPS"] = benchmark::Counter(
500 uint64_t(state.iterations()) * 2 *
501 batch_size * output_height * output_width *
502 groups * group_input_channels * group_output_channels *
503 kernel_height * kernel_width,
504 benchmark::Counter::kIsRate);
505 }
506
507 #ifdef BENCHMARK_TENSORFLOW_LITE
tflite_convolution_f32(benchmark::State & state,const char * net)508 void tflite_convolution_f32(benchmark::State& state, const char* net) {
509 const size_t batch_size = state.range(0);
510 const size_t input_height = state.range(1);
511 const size_t input_width = state.range(2);
512 const size_t kernel_height = state.range(3);
513 const size_t kernel_width = state.range(4);
514 const size_t padding_height = state.range(5);
515 const size_t padding_width = state.range(6);
516 const size_t subsampling = state.range(7);
517 const size_t dilation = state.range(8);
518 const size_t groups = state.range(9);
519 const size_t group_input_channels = state.range(10);
520 const size_t group_output_channels = state.range(11);
521
522 bool is_depthwise = false;
523 if (groups != 1) {
524 if (group_input_channels == 1) {
525 is_depthwise = true;
526 } else {
527 state.SkipWithError("grouped convolution is not supported");
528 return;
529 }
530 }
531
532 std::random_device random_device;
533 auto rng = std::mt19937(random_device());
534 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
535
536 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
537 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
538
539 tflite::Padding padding = tflite::Padding_VALID;
540 if (padding_width == (effective_kernel_width - 1) && padding_height == (effective_kernel_height - 1)) {
541 padding = tflite::Padding_SAME;
542 } else if (padding_width == 0 && padding_height == 0) {
543 padding = tflite::Padding_VALID;
544 } else {
545 state.SkipWithError("unsupported padding");
546 return;
547 }
548
549 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
550 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
551
552 std::vector<float> kernel(groups * group_output_channels * kernel_height * kernel_width * group_input_channels);
553 std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
554 std::vector<float> bias(groups * group_output_channels);
555 std::generate(bias.begin(), bias.end(), std::ref(f32rng));
556
557 flatbuffers::FlatBufferBuilder builder;
558 flatbuffers::Offset<tflite::OperatorCode> operator_code =
559 CreateOperatorCode(
560 builder,
561 is_depthwise ? tflite::BuiltinOperator_DEPTHWISE_CONV_2D : tflite::BuiltinOperator_CONV_2D,
562 0);
563
564 flatbuffers::Offset<tflite::Conv2DOptions> conv2d_options = CreateConv2DOptions(
565 builder,
566 padding,
567 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
568 tflite::ActivationFunctionType_NONE,
569 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
570
571 flatbuffers::Offset<tflite::DepthwiseConv2DOptions> dwconv2d_options = CreateDepthwiseConv2DOptions(
572 builder,
573 padding,
574 static_cast<int32_t>(subsampling), static_cast<int32_t>(subsampling),
575 static_cast<int32_t>(group_output_channels),
576 tflite::ActivationFunctionType_NONE,
577 static_cast<int32_t>(dilation), static_cast<int32_t>(dilation));
578
579 flatbuffers::Offset<tflite::Buffer> buffers[3] = {
580 tflite::CreateBuffer(builder, builder.CreateVector({})),
581 tflite::CreateBuffer(builder, builder.CreateVector(
582 reinterpret_cast<const uint8_t*>(kernel.data()),
583 sizeof(float) * kernel.size())),
584 tflite::CreateBuffer(builder, builder.CreateVector(
585 reinterpret_cast<const uint8_t*>(bias.data()),
586 sizeof(float) * bias.size())),
587 };
588
589 const int32_t input_shape[4] = {
590 static_cast<int32_t>(batch_size),
591 static_cast<int32_t>(input_height),
592 static_cast<int32_t>(input_width),
593 static_cast<int32_t>(groups * group_input_channels)
594 };
595 const int32_t output_shape[4] = {
596 static_cast<int32_t>(batch_size),
597 static_cast<int32_t>(output_height),
598 static_cast<int32_t>(output_width),
599 static_cast<int32_t>(groups * group_output_channels)
600 };
601 const int32_t filter_shape[4] = {
602 static_cast<int32_t>(group_output_channels),
603 static_cast<int32_t>(kernel_height),
604 static_cast<int32_t>(kernel_width),
605 static_cast<int32_t>(groups * group_input_channels)
606 };
607 const int32_t bias_shape[1] = {
608 static_cast<int32_t>(groups * group_output_channels)
609 };
610
611 flatbuffers::Offset<tflite::Tensor> tensors[4] = {
612 tflite::CreateTensor(builder,
613 builder.CreateVector<int32_t>(input_shape, 4),
614 tflite::TensorType_FLOAT32,
615 0 /* buffer id */,
616 builder.CreateString("input")),
617 tflite::CreateTensor(builder,
618 builder.CreateVector<int32_t>(filter_shape, 4),
619 tflite::TensorType_FLOAT32,
620 1 /* buffer id */,
621 builder.CreateString("filter")),
622 tflite::CreateTensor(builder,
623 builder.CreateVector<int32_t>(bias_shape, 1),
624 tflite::TensorType_FLOAT32,
625 2 /* buffer id */,
626 builder.CreateString("bias")),
627 tflite::CreateTensor(builder,
628 builder.CreateVector<int32_t>(output_shape, 4),
629 tflite::TensorType_FLOAT32,
630 0 /* buffer id */,
631 builder.CreateString("output")),
632 };
633
634 const int32_t op_inputs[3] = { 0, 1, 2 };
635 const int32_t op_outputs[1] = { 3 };
636 flatbuffers::Offset<tflite::Operator> op = CreateOperator(
637 builder,
638 0 /* opcode_index */,
639 builder.CreateVector<int32_t>(op_inputs, 3),
640 builder.CreateVector<int32_t>(op_outputs, 1),
641 is_depthwise ? tflite::BuiltinOptions_DepthwiseConv2DOptions : tflite::BuiltinOptions_Conv2DOptions,
642 is_depthwise ? dwconv2d_options.Union() : conv2d_options.Union(),
643 /*custom_options */ 0,
644 tflite::CustomOptionsFormat_FLEXBUFFERS);
645
646 const int32_t graph_inputs[1] = { 0 };
647 const int32_t graph_outputs[1] = { 3 };
648 flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
649 builder,
650 builder.CreateVector(tensors, 4),
651 builder.CreateVector<int32_t>(graph_inputs, 1),
652 builder.CreateVector<int32_t>(graph_outputs, 1),
653 builder.CreateVector(&op, 1),
654 builder.CreateString("Conv2D subgraph"));
655
656 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("Conv2D model");
657
658 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
659 TFLITE_SCHEMA_VERSION,
660 builder.CreateVector(&operator_code, 1),
661 builder.CreateVector(&subgraph, 1),
662 description,
663 builder.CreateVector(buffers, 3));
664
665 builder.Finish(model_buffer);
666
667 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
668 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
669 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
670 std::unique_ptr<tflite::Interpreter> interpreter;
671 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
672 state.SkipWithError("failed to create TFLite interpreter");
673 return;
674 }
675 if (interpreter == nullptr) {
676 state.SkipWithError("TFLite interpreter is null");
677 return;
678 }
679 interpreter->SetNumThreads(1);
680
681 if (interpreter->AllocateTensors() != kTfLiteOk) {
682 state.SkipWithError("failed to allocate tensors");
683 return;
684 }
685
686 std::generate(
687 interpreter->typed_tensor<float>(0),
688 interpreter->typed_tensor<float>(0) + batch_size * groups * group_input_channels * input_height * input_width,
689 std::ref(f32rng));
690
691 for (auto _ : state) {
692 state.PauseTiming();
693 benchmark::utils::WipeCache();
694 benchmark::utils::PrefetchToL1(
695 interpreter->typed_tensor<float>(0),
696 batch_size * groups * group_input_channels * input_height * input_width * sizeof(float));
697 state.ResumeTiming();
698
699 if (interpreter->Invoke() != kTfLiteOk) {
700 state.SkipWithError("failed to invoke TFLite interpreter");
701 return;
702 }
703 }
704
705 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
706 if (cpu_frequency != 0) {
707 state.counters["cpufreq"] = cpu_frequency;
708 }
709
710 state.counters["FLOPS"] = benchmark::Counter(
711 uint64_t(state.iterations()) * 2 *
712 batch_size * output_height * output_width *
713 groups * group_input_channels * group_output_channels *
714 kernel_height * kernel_width,
715 benchmark::Counter::kIsRate);
716
717 interpreter.reset();
718 }
719 #endif // BENCHMARK_TENSORFLOW_LITE
720
721 // ShuffleNet v1 with 1 group.
ShuffleNetV1G1(benchmark::internal::Benchmark * b)722 static void ShuffleNetV1G1(benchmark::internal::Benchmark* b) {
723 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
724
725 /*************************** Conv 1 **************************/
726 /* N H W KH KW PH PW S D G GCin GCout */
727 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
728 /******************* Stage 2: stride-2 unit ******************/
729 /* N H W KH KW PH PW S D G GCin GCout */
730 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 36});
731 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 36, 1, 1});
732 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 120});
733 /******************* Stage 2: stride-1 units *****************/
734 /* N H W KH KW PH PW S D G GCin GCout */
735 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 36});
736 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 36, 1, 1});
737 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 36, 144});
738 /******************* Stage 3: stride-2 unit ******************/
739 /* N H W KH KW PH PW S D G GCin GCout */
740 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 72});
741 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 72, 1, 1});
742 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 144});
743 /******************* Stage 3: stride-1 units *****************/
744 /* N H W KH KW PH PW S D G GCin GCout */
745 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 72});
746 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 72, 1, 1});
747 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 72, 288});
748 /******************* Stage 4: stride-2 unit ******************/
749 /* N H W KH KW PH PW S D G GCin GCout */
750 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 288, 144});
751 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 144, 1, 1});
752 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 288});
753 /******************* Stage 4: stride-1 units *****************/
754 /* N H W KH KW PH PW S D G GCin GCout */
755 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 144});
756 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 144, 1, 1});
757 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 144, 576});
758 }
759
760 // ShuffleNet v1 with 2 groups.
ShuffleNetV1G2(benchmark::internal::Benchmark * b)761 static void ShuffleNetV1G2(benchmark::internal::Benchmark* b) {
762 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
763
764 /*************************** Conv 1 **************************/
765 /* N H W KH KW PH PW S D G GCin GCout */
766 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
767 /******************* Stage 2: stride-2 unit ******************/
768 /* N H W KH KW PH PW S D G GCin GCout */
769 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 50});
770 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 50, 1, 1});
771 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 88});
772 /******************* Stage 2: stride-1 units *****************/
773 /* N H W KH KW PH PW S D G GCin GCout */
774 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 25});
775 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 50, 1, 1});
776 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 25, 100});
777 /******************* Stage 3: stride-2 unit ******************/
778 /* N H W KH KW PH PW S D G GCin GCout */
779 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 2, 100, 50});
780 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 100, 1, 1});
781 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 100});
782 /******************* Stage 3: stride-1 units *****************/
783 /* N H W KH KW PH PW S D G GCin GCout */
784 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 50});
785 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 100, 1, 1});
786 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 50, 200});
787 /******************* Stage 4: stride-2 unit ******************/
788 /* N H W KH KW PH PW S D G GCin GCout */
789 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 2, 200, 100});
790 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 200, 1, 1});
791 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 200});
792 /******************* Stage 4: stride-1 units *****************/
793 /* N H W KH KW PH PW S D G GCin GCout */
794 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 400, 100});
795 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 200, 1, 1});
796 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 2, 100, 400});
797 }
798
799 // ShuffleNet v1 with 3 groups.
ShuffleNetV1G3(benchmark::internal::Benchmark * b)800 static void ShuffleNetV1G3(benchmark::internal::Benchmark* b) {
801 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
802
803 /*************************** Conv 1 **************************/
804 /* N H W KH KW PH PW S D G GCin GCout */
805 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
806 /******************* Stage 2: stride-2 unit ******************/
807 /* N H W KH KW PH PW S D G GCin GCout */
808 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 60});
809 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 60, 1, 1});
810 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 72});
811 /******************* Stage 2: stride-1 units *****************/
812 /* N H W KH KW PH PW S D G GCin GCout */
813 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 20});
814 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 60, 1, 1});
815 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 20, 80});
816 /******************* Stage 3: stride-2 unit ******************/
817 /* N H W KH KW PH PW S D G GCin GCout */
818 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 3, 80, 40});
819 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 120, 1, 1});
820 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 80});
821 /******************* Stage 3: stride-1 units *****************/
822 /* N H W KH KW PH PW S D G GCin GCout */
823 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 40});
824 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 120, 1, 1});
825 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 40, 160});
826 /******************* Stage 4: stride-2 unit ******************/
827 /* N H W KH KW PH PW S D G GCin GCout */
828 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 3, 160, 80});
829 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 240, 1, 1});
830 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 160});
831 /******************* Stage 4: stride-1 units *****************/
832 /* N H W KH KW PH PW S D G GCin GCout */
833 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 320, 80});
834 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 240, 1, 1});
835 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 3, 80, 320});
836 }
837
838 // ShuffleNet v1 with 4 groups.
ShuffleNetV1G4(benchmark::internal::Benchmark * b)839 static void ShuffleNetV1G4(benchmark::internal::Benchmark* b) {
840 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
841
842 /*************************** Conv 1 **************************/
843 /* N H W KH KW PH PW S D G GCin GCout */
844 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
845 /******************* Stage 2: stride-2 unit ******************/
846 /* N H W KH KW PH PW S D G GCin GCout */
847 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 68});
848 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 68, 1, 1});
849 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 62});
850 /******************* Stage 2: stride-1 units *****************/
851 /* N H W KH KW PH PW S D G GCin GCout */
852 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 17});
853 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 68, 1, 1});
854 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 17, 68});
855 /******************* Stage 3: stride-2 unit ******************/
856 /* N H W KH KW PH PW S D G GCin GCout */
857 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 4, 68, 34});
858 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 136, 1, 1});
859 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 68});
860 /******************* Stage 3: stride-1 units *****************/
861 /* N H W KH KW PH PW S D G GCin GCout */
862 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 34});
863 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 136, 1, 1});
864 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 34, 136});
865 /******************* Stage 4: stride-2 unit ******************/
866 /* N H W KH KW PH PW S D G GCin GCout */
867 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 4, 136, 68});
868 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 272, 1, 1});
869 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 136});
870 /******************* Stage 4: stride-1 units *****************/
871 /* N H W KH KW PH PW S D G GCin GCout */
872 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 272, 68});
873 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 272, 1, 1});
874 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 4, 68, 272});
875 }
876
877 // ShuffleNet v1 with 8 groups.
ShuffleNetV1G8(benchmark::internal::Benchmark * b)878 static void ShuffleNetV1G8(benchmark::internal::Benchmark* b) {
879 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
880
881 /*************************** Conv 1 **************************/
882 /* N H W KH KW PH PW S D G GCin GCout */
883 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
884 /******************* Stage 2: stride-2 unit ******************/
885 /* N H W KH KW PH PW S D G GCin GCout */
886 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 96});
887 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 96, 1, 1});
888 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 45});
889 /******************* Stage 2: stride-1 units *****************/
890 /* N H W KH KW PH PW S D G GCin GCout */
891 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 12});
892 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 96, 1, 1});
893 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 12, 48});
894 /******************* Stage 3: stride-2 unit ******************/
895 /* N H W KH KW PH PW S D G GCin GCout */
896 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 8, 48, 24});
897 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
898 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 48});
899 /******************* Stage 3: stride-1 units *****************/
900 /* N H W KH KW PH PW S D G GCin GCout */
901 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 24});
902 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 192, 1, 1});
903 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 24, 96});
904 /******************* Stage 4: stride-2 unit ******************/
905 /* N H W KH KW PH PW S D G GCin GCout */
906 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 8, 96, 48});
907 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 384, 1, 1});
908 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 96});
909 /******************* Stage 4: stride-1 units *****************/
910 /* N H W KH KW PH PW S D G GCin GCout */
911 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 192, 48});
912 b->Args({1, 7, 7, 3, 3, 2, 2, 2, 1, 384, 1, 1});
913 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 8, 48, 192});
914 }
915
916 // ShuffleNet v2 (0.5X scale)
ShuffleNetV2X05(benchmark::internal::Benchmark * b)917 static void ShuffleNetV2X05(benchmark::internal::Benchmark* b) {
918 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
919
920 /*************************** Conv 1 **************************/
921 /* N H W KH KW PH PW S D G GCin GCout */
922 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
923 /************************** Stage 2 **************************/
924 /* N H W KH KW PH PW S D G GCin GCout */
925 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
926 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 24});
927 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 24});
928 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 24, 1, 1});
929 /************************** Stage 3 **************************/
930 /* N H W KH KW PH PW S D G GCin GCout */
931 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 48, 1, 1});
932 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 48});
933 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 48, 48});
934 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 48, 1, 1});
935 /************************** Stage 4 **************************/
936 /* N H W KH KW PH PW S D G GCin GCout */
937 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 96, 1, 1});
938 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 96});
939 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 96});
940 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 96, 1, 1});
941 /*************************** Conv 5 **************************/
942 /* N H W KH KW PH PW S D G GCin GCout */
943 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 192, 1024});
944 }
945
946 // ShuffleNet v2 (1.0X scale)
ShuffleNetV2X10(benchmark::internal::Benchmark * b)947 static void ShuffleNetV2X10(benchmark::internal::Benchmark* b) {
948 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
949
950 /*************************** Conv 1 **************************/
951 /* N H W KH KW PH PW S D G GCin GCout */
952 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
953 /************************** Stage 2 **************************/
954 /* N H W KH KW PH PW S D G GCin GCout */
955 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
956 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 58});
957 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 58});
958 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 58, 1, 1});
959 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 58, 58});
960 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 58, 1, 1});
961 /************************** Stage 3 **************************/
962 /* N H W KH KW PH PW S D G GCin GCout */
963 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 116, 1, 1});
964 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 116, 116});
965 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 116, 116});
966 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 116, 1, 1});
967 /************************** Stage 4 **************************/
968 /* N H W KH KW PH PW S D G GCin GCout */
969 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 232, 1, 1});
970 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 232, 232});
971 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 232, 232});
972 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 232, 1, 1});
973 /*************************** Conv 5 **************************/
974 /* N H W KH KW PH PW S D G GCin GCout */
975 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 464, 1024});
976 }
977
978 // ShuffleNet v2 (1.5X scale)
ShuffleNetV2X15(benchmark::internal::Benchmark * b)979 static void ShuffleNetV2X15(benchmark::internal::Benchmark* b) {
980 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
981
982 /*************************** Conv 1 **************************/
983 /* N H W KH KW PH PW S D G GCin GCout */
984 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
985 /************************** Stage 2 **************************/
986 /* N H W KH KW PH PW S D G GCin GCout */
987 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
988 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
989 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 88});
990 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 88, 1, 1});
991 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 88});
992 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
993 /************************** Stage 3 **************************/
994 /* N H W KH KW PH PW S D G GCin GCout */
995 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 176, 1, 1});
996 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 176, 176});
997 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 176, 176});
998 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 176, 1, 1});
999 /************************** Stage 4 **************************/
1000 /* N H W KH KW PH PW S D G GCin GCout */
1001 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 352, 1, 1});
1002 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1003 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 352, 352});
1004 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 352, 1, 1});
1005 /*************************** Conv 5 **************************/
1006 /* N H W KH KW PH PW S D G GCin GCout */
1007 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 704, 1024});
1008 }
1009
1010 // ShuffleNet v2 (2.0X scale)
ShuffleNetV2X20(benchmark::internal::Benchmark * b)1011 static void ShuffleNetV2X20(benchmark::internal::Benchmark* b) {
1012 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1013
1014 /*************************** Conv 1 **************************/
1015 /* N H W KH KW PH PW S D G GCin GCout */
1016 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 24});
1017 /************************** Stage 2 **************************/
1018 /* N H W KH KW PH PW S D G GCin GCout */
1019 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 24, 1, 1});
1020 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1021 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 122});
1022 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 122, 1, 1});
1023 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 122, 122});
1024 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 122, 1, 1});
1025 /************************** Stage 3 **************************/
1026 /* N H W KH KW PH PW S D G GCin GCout */
1027 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 244, 1, 1});
1028 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1029 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 244, 244});
1030 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 244, 1, 1});
1031 /************************** Stage 4 **************************/
1032 /* N H W KH KW PH PW S D G GCin GCout */
1033 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 488, 1, 1});
1034 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1035 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 488, 488});
1036 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 488, 1, 1});
1037 /*************************** Conv 5 **************************/
1038 /* N H W KH KW PH PW S D G GCin GCout */
1039 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 976, 2048});
1040 }
1041
MobileNetV1(benchmark::internal::Benchmark * b)1042 static void MobileNetV1(benchmark::internal::Benchmark* b) {
1043 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1044
1045 /* N H W KH KW PH PW S D G GCin GCout */
1046 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1047 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1048 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 64});
1049 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1050 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 128});
1051 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 128, 1, 1});
1052 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 128, 128});
1053 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 128, 1, 1});
1054 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 256});
1055 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 256, 1, 1});
1056 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1057 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 256, 1, 1});
1058 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 512});
1059 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 512, 1, 1});
1060 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1061 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 512, 1, 1});
1062 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 1024});
1063 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1024, 1, 1});
1064 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 1024, 1024});
1065 }
1066
MobileNetV2(benchmark::internal::Benchmark * b)1067 static void MobileNetV2(benchmark::internal::Benchmark* b) {
1068 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1069
1070 /* N H W KH KW PH PW S D G GCin GCout */
1071 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 32});
1072
1073 /************************ Bottleneck 1 ***********************/
1074 /* N H W KH KW PH PW S D G GCin GCout */
1075 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 32, 1, 1});
1076 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 32, 16});
1077
1078 /************************ Bottleneck 2 ***********************/
1079 /* N H W KH KW PH PW S D G GCin GCout */
1080 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 96});
1081 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 96, 1, 1});
1082 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1083 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1084 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 144, 1, 1});
1085 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 144, 24});
1086
1087 /************************ Bottleneck 3 ***********************/
1088 /* N H W KH KW PH PW S D G GCin GCout */
1089 //b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 144});
1090 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 144, 1, 1});
1091 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 144, 32});
1092 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1093 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1094 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1095 //b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1096 //b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 192, 1, 1});
1097 //b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1098
1099 /************************ Bottleneck 4 ***********************/
1100 /* N H W KH KW PH PW S D G GCin GCout */
1101 //b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 32, 192});
1102 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 192, 1, 1});
1103 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1104 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1105 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1106 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1107 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1108 //b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1109 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1110 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1111 //b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1112 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1113
1114 /************************ Bottleneck 5 ***********************/
1115 /* N H W KH KW PH PW S D G GCin GCout */
1116 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 64, 384});
1117 //b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 384, 1, 1});
1118 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 384, 96});
1119 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1120 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1121 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1122 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1123 //b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 576, 1, 1});
1124 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1125
1126 /************************ Bottleneck 6 ***********************/
1127 /* N H W KH KW PH PW S D G GCin GCout */
1128 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1129 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 576, 1, 1});
1130 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 160});
1131 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1132 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1133 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1134 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1135 //b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1136 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1137
1138 /************************ Bottleneck 7 ***********************/
1139 /* N H W KH KW PH PW S D G GCin GCout */
1140 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1141 //b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 960, 1, 1});
1142 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 320});
1143
1144 /******************** Pre-pooling Conv2D *********************/
1145 /* N H W KH KW PH PW S D G GCin GCout */
1146 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 320, 1280});
1147 /******************** Post-pooling Conv2D ********************/
1148 /* N H W KH KW PH PW S D G GCin GCout */
1149 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1000});
1150 }
1151
MobileNetV3Small(benchmark::internal::Benchmark * b)1152 static void MobileNetV3Small(benchmark::internal::Benchmark* b) {
1153 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1154
1155 /*********************** Initial Stage ***********************/
1156 /* N H W KH KW PH PW S D G GCin GCout */
1157 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1158 /*********************** Bottleneck 1 ************************/
1159 /* N H W KH KW PH PW S D G GCin GCout */
1160 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 16, 1, 1});
1161 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 16, 8});
1162 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 8, 16});
1163 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1164 /*********************** Bottleneck 2 ************************/
1165 /* N H W KH KW PH PW S D G GCin GCout */
1166 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 16, 72});
1167 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 72, 1, 1});
1168 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1169 /*********************** Bottleneck 3 ************************/
1170 /* N H W KH KW PH PW S D G GCin GCout */
1171 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 88});
1172 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 88, 1, 1});
1173 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 88, 24});
1174 /*********************** Bottleneck 4 ************************/
1175 /* N H W KH KW PH PW S D G GCin GCout */
1176 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1177 b->Args({1, 28, 28, 5, 5, 4, 4, 2, 1, 96, 1, 1});
1178 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 96, 24});
1179 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 96});
1180 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 96, 40});
1181 /*********************** Bottleneck 5 ************************/
1182 /* N H W KH KW PH PW S D G GCin GCout */
1183 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1184 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1185 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1186 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1187 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1188 /*********************** Bottleneck 6 ************************/
1189 /* N H W KH KW PH PW S D G GCin GCout */
1190 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1191 //b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 240, 1, 1});
1192 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 64});
1193 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 64, 240});
1194 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 40});
1195 /*********************** Bottleneck 7 ************************/
1196 /* N H W KH KW PH PW S D G GCin GCout */
1197 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1198 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1199 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1200 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1201 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 120, 48});
1202 /*********************** Bottleneck 8 ************************/
1203 /* N H W KH KW PH PW S D G GCin GCout */
1204 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 144});
1205 b->Args({1, 14, 14, 5, 5, 4, 4, 1, 1, 144, 1, 1});
1206 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 40});
1207 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 40, 144});
1208 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 144, 48});
1209 /*********************** Bottleneck 9 ************************/
1210 /* N H W KH KW PH PW S D G GCin GCout */
1211 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 48, 288});
1212 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 288, 1, 1});
1213 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 288, 72});
1214 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 288});
1215 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 288, 96});
1216 /*********************** Bottleneck 10 ***********************/
1217 /* N H W KH KW PH PW S D G GCin GCout */
1218 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1219 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1220 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1221 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1222 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1223 /*********************** Bottleneck 11 ***********************/
1224 /* N H W KH KW PH PW S D G GCin GCout */
1225 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1226 //b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 576, 1, 1});
1227 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 144});
1228 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 144, 576});
1229 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 576, 96});
1230 /************************ Last Stage ************************/
1231 /* N H W KH KW PH PW S D G GCin GCout */
1232 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 96, 576});
1233 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 576, 1024});
1234 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1024, 1001});
1235 }
1236
MobileNetV3Large(benchmark::internal::Benchmark * b)1237 static void MobileNetV3Large(benchmark::internal::Benchmark* b) {
1238 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1239
1240 /*********************** Initial Stage ***********************/
1241 /* N H W KH KW PH PW S D G GCin GCout */
1242 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 16});
1243 /*********************** Bottleneck 1 ************************/
1244 /* N H W KH KW PH PW S D G GCin GCout */
1245 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 16, 1, 1});
1246 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 16});
1247 /*********************** Bottleneck 2 ************************/
1248 /* N H W KH KW PH PW S D G GCin GCout */
1249 b->Args({1, 112, 112, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1250 b->Args({1, 112, 112, 3, 3, 2, 2, 2, 1, 64, 1, 1});
1251 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 24});
1252 /*********************** Bottleneck 3 ************************/
1253 /* N H W KH KW PH PW S D G GCin GCout */
1254 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1255 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 72, 1, 1});
1256 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1257 /*********************** Bottleneck 4 ************************/
1258 /* N H W KH KW PH PW S D G GCin GCout */
1259 //b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1260 b->Args({1, 56, 56, 5, 5, 4, 4, 2, 1, 72, 1, 1});
1261 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 72, 24});
1262 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 24, 72});
1263 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 72, 40});
1264 /*********************** Bottleneck 5 ************************/
1265 /* N H W KH KW PH PW S D G GCin GCout */
1266 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1267 b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1268 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1269 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1270 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1271 /*********************** Bottleneck 6 ************************/
1272 /* N H W KH KW PH PW S D G GCin GCout */
1273 //b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 120});
1274 //b->Args({1, 28, 28, 5, 5, 4, 4, 1, 1, 120, 1, 1});
1275 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 32});
1276 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 32, 120});
1277 //b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 120, 40});
1278 /*********************** Bottleneck 7 ************************/
1279 /* N H W KH KW PH PW S D G GCin GCout */
1280 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 40, 240});
1281 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 240, 1, 1});
1282 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 240, 80});
1283 /*********************** Bottleneck 8 ************************/
1284 /* N H W KH KW PH PW S D G GCin GCout */
1285 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 200});
1286 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 200, 1, 1});
1287 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 200, 80});
1288 /*********************** Bottleneck 9 ************************/
1289 /* N H W KH KW PH PW S D G GCin GCout */
1290 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1291 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1292 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1293 /********************** Bottleneck 10 ***********************/
1294 /* N H W KH KW PH PW S D G GCin GCout */
1295 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 184});
1296 //b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 184, 1, 1});
1297 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 184, 80});
1298 /********************** Bottleneck 11 ***********************/
1299 /* N H W KH KW PH PW S D G GCin GCout */
1300 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 80, 480});
1301 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 480, 1, 1});
1302 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 480, 120});
1303 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 120, 480});
1304 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 480, 112});
1305 /********************** Bottleneck 12 ***********************/
1306 /* N H W KH KW PH PW S D G GCin GCout */
1307 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1308 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 672, 1, 1});
1309 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 672, 168});
1310 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 168, 672});
1311 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 672, 112});
1312 /********************** Bottleneck 13 ***********************/
1313 /* N H W KH KW PH PW S D G GCin GCout */
1314 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 112, 672});
1315 b->Args({1, 14, 14, 5, 5, 4, 4, 2, 1, 672, 1, 1});
1316 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 672, 160});
1317 /********************** Bottleneck 14 ***********************/
1318 /* N H W KH KW PH PW S D G GCin GCout */
1319 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1320 b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1321 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1322 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1323 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1324 /********************** Bottleneck 15 ***********************/
1325 /* N H W KH KW PH PW S D G GCin GCout */
1326 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1327 //b->Args({1, 7, 7, 5, 5, 4, 4, 1, 1, 960, 1, 1});
1328 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 240});
1329 //b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 240, 960});
1330 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 960, 160});
1331 /************************ Last Stage ***********************/
1332 /* N H W KH KW PH PW S D G GCin GCout */
1333 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 160, 960});
1334 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 960, 1280});
1335 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1280, 1001});
1336 }
1337
1338 // SqueezeNet 1.0
SqueezeNetV10(benchmark::internal::Benchmark * b)1339 static void SqueezeNetV10(benchmark::internal::Benchmark* b) {
1340 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1341
1342 /************************** Conv 1 *************************/
1343 /* N H W KH KW PH PW S D G GCin GCout */
1344 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 96});
1345 /************************** Fire 2 *************************/
1346 /* N H W KH KW PH PW S D G GCin GCout */
1347 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 96, 16});
1348 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1349 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1350 /************************** Fire 3 *************************/
1351 /* N H W KH KW PH PW S D G GCin GCout */
1352 b->Args({1, 56, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1353 //b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1354 //b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1355 /************************** Fire 4 *************************/
1356 /* N H W KH KW PH PW S D G GCin GCout */
1357 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1358 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1359 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1360 /************************** Fire 5 *************************/
1361 /* N H W KH KW PH PW S D G GCin GCout */
1362 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1363 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1364 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1365 /************************** Fire 6 *************************/
1366 /* N H W KH KW PH PW S D G GCin GCout */
1367 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1368 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1369 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1370 /************************** Fire 7 *************************/
1371 /* N H W KH KW PH PW S D G GCin GCout */
1372 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1373 //b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1374 //b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1375 /************************** Fire 8 *************************/
1376 /* N H W KH KW PH PW S D G GCin GCout */
1377 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1378 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1379 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1380 /************************** Fire 9 *************************/
1381 /* N H W KH KW PH PW S D G GCin GCout */
1382 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1383 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1384 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1385 /************************* Conv 10 *************************/
1386 /* N H W KH KW PH PW S D G GCin GCout */
1387 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1388 }
1389
1390 // SqueezeNet 1.1
SqueezeNetV11(benchmark::internal::Benchmark * b)1391 static void SqueezeNetV11(benchmark::internal::Benchmark* b) {
1392 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1393
1394 /************************** Conv 1 *************************/
1395 /* N H W KH KW PH PW S D G GCin GCout */
1396 b->Args({1, 224, 224, 3, 3, 2, 2, 2, 1, 1, 3, 64});
1397 /************************** Fire 2 *************************/
1398 /* N H W KH KW PH PW S D G GCin GCout */
1399 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 64, 16});
1400 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1401 b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1402 /************************** Fire 3 *************************/
1403 /* N H W KH KW PH PW S D G GCin GCout */
1404 b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 128, 16});
1405 //b->Args({1, 55, 55, 1, 1, 0, 0, 1, 1, 1, 16, 64});
1406 //b->Args({1, 55, 55, 3, 3, 2, 2, 1, 1, 1, 16, 64});
1407 /************************** Fire 4 *************************/
1408 /* N H W KH KW PH PW S D G GCin GCout */
1409 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 128, 32});
1410 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1411 b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1412 /************************** Fire 5 *************************/
1413 /* N H W KH KW PH PW S D G GCin GCout */
1414 b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 256, 32});
1415 //b->Args({1, 27, 27, 1, 1, 0, 0, 1, 1, 1, 32, 128});
1416 //b->Args({1, 27, 27, 3, 3, 2, 2, 1, 1, 1, 32, 128});
1417 /************************** Fire 6 *************************/
1418 /* N H W KH KW PH PW S D G GCin GCout */
1419 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1420 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1421 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1422 /************************** Fire 7 *************************/
1423 /* N H W KH KW PH PW S D G GCin GCout */
1424 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 48});
1425 //b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 48, 192});
1426 //b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 48, 192});
1427 /************************** Fire 8 *************************/
1428 /* N H W KH KW PH PW S D G GCin GCout */
1429 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 384, 64});
1430 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1431 b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1432 /************************** Fire 9 *************************/
1433 /* N H W KH KW PH PW S D G GCin GCout */
1434 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 64});
1435 //b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1436 //b->Args({1, 13, 13, 3, 3, 2, 2, 1, 1, 1, 64, 256});
1437 /************************* Conv 10 *************************/
1438 /* N H W KH KW PH PW S D G GCin GCout */
1439 b->Args({1, 13, 13, 1, 1, 0, 0, 1, 1, 1, 512, 1000});
1440 }
1441
InceptionV3(benchmark::internal::Benchmark * b)1442 static void InceptionV3(benchmark::internal::Benchmark* b) {
1443 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1444
1445 /* N H W KH KW PH PW S D G GCin GCout */
1446 b->Args({1, 299, 299, 3, 3, 0, 0, 2, 1, 1, 3, 32});
1447 b->Args({1, 149, 149, 3, 3, 0, 0, 1, 1, 1, 32, 32});
1448 b->Args({1, 147, 147, 3, 3, 2, 2, 1, 1, 1, 32, 64});
1449 b->Args({1, 73, 73, 1, 1, 0, 0, 1, 1, 1, 64, 80});
1450 b->Args({1, 73, 73, 3, 3, 0, 0, 1, 1, 1, 80, 192});
1451 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 64});
1452 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 48});
1453 b->Args({1, 35, 35, 5, 5, 4, 4, 1, 1, 1, 48, 64});
1454 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 64, 96});
1455 b->Args({1, 35, 35, 3, 3, 2, 2, 1, 1, 1, 96, 96});
1456 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 192, 32});
1457 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1458 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 256, 48});
1459 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 64});
1460 b->Args({1, 35, 35, 1, 1, 0, 0, 1, 1, 1, 288, 48});
1461 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 288, 384});
1462 b->Args({1, 35, 35, 3, 3, 0, 0, 2, 1, 1, 96, 96});
1463 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 192});
1464 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 128});
1465 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 128});
1466 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 192});
1467 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 128, 128});
1468 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 128, 192});
1469 b->Args({1, 17, 17, 1, 1, 0, 0, 1, 1, 1, 768, 160});
1470 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 160});
1471 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 192});
1472 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 160, 160});
1473 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 160, 192});
1474 b->Args({1, 17, 17, 1, 7, 0, 6, 1, 1, 1, 192, 192});
1475 b->Args({1, 17, 17, 7, 1, 6, 0, 1, 1, 1, 192, 192});
1476 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 320});
1477 b->Args({1, 17, 17, 3, 3, 0, 0, 2, 1, 1, 192, 192});
1478 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 320});
1479 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 384});
1480 b->Args({1, 8, 8, 1, 3, 0, 2, 1, 1, 1, 384, 384});
1481 b->Args({1, 8, 8, 3, 1, 2, 0, 1, 1, 1, 384, 384});
1482 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 448});
1483 b->Args({1, 8, 8, 3, 3, 2, 2, 1, 1, 1, 448, 384});
1484 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 1280, 192});
1485 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 320});
1486 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 384});
1487 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 448});
1488 b->Args({1, 8, 8, 1, 1, 0, 0, 1, 1, 1, 2048, 192});
1489 b->Args({1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2048, 1001});
1490 }
1491
ResNet18(benchmark::internal::Benchmark * b)1492 static void ResNet18(benchmark::internal::Benchmark* b) {
1493 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1494
1495 /************************* Conv 1 *************************/
1496 /* N H W KH KW PH PW S D G GCin GCout */
1497 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1498 /************************ Conv 2.X ************************/
1499 /* N H W KH KW PH PW S D G GCin GCout */
1500 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1501 /************************ Conv 3.X ************************/
1502 /* N H W KH KW PH PW S D G GCin GCout */
1503 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 64, 128});
1504 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1505 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 64, 128});
1506 /************************ Conv 4.X ************************/
1507 /* N H W KH KW PH PW S D G GCin GCout */
1508 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 128, 256});
1509 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1510 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 128, 256});
1511 /************************ Conv 5.X ************************/
1512 /* N H W KH KW PH PW S D G GCin GCout */
1513 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 256, 512});
1514 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1515 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1516 }
1517
ResNet50(benchmark::internal::Benchmark * b)1518 static void ResNet50(benchmark::internal::Benchmark* b) {
1519 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1520
1521 /************************* Conv 1 *************************/
1522 /* N H W KH KW PH PW S D G GCin GCout */
1523 b->Args({1, 224, 224, 7, 7, 6, 6, 2, 1, 1, 3, 64});
1524 /************************ Conv 2.1 ************************/
1525 /* N H W KH KW PH PW S D G GCin GCout */
1526 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 64});
1527 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1528 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1529 //b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1530 /************************ Conv 2.X ************************/
1531 /* N H W KH KW PH PW S D G GCin GCout */
1532 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 64});
1533 //b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1534 //b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 64, 256});
1535 /************************ Conv 3.1 ************************/
1536 /* N H W KH KW PH PW S D G GCin GCout */
1537 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 128});
1538 b->Args({1, 56, 56, 3, 3, 2, 2, 2, 1, 1, 128, 128});
1539 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1540 b->Args({1, 56, 56, 1, 1, 0, 0, 2, 1, 1, 256, 512});
1541 /************************ Conv 3.X ************************/
1542 /* N H W KH KW PH PW S D G GCin GCout */
1543 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 128});
1544 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1545 //b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 128, 512});
1546 /************************ Conv 4.1 ************************/
1547 /* N H W KH KW PH PW S D G GCin GCout */
1548 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 256});
1549 b->Args({1, 28, 28, 3, 3, 2, 2, 2, 1, 1, 256, 256});
1550 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1551 b->Args({1, 28, 28, 1, 1, 0, 0, 2, 1, 1, 512, 1024});
1552 /************************ Conv 4.X ************************/
1553 /* N H W KH KW PH PW S D G GCin GCout */
1554 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 256});
1555 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1556 //b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 256, 1024});
1557 /************************ Conv 5.1 ************************/
1558 /* N H W KH KW PH PW S D G GCin GCout */
1559 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 1024, 512});
1560 b->Args({1, 14, 14, 3, 3, 2, 2, 2, 1, 1, 512, 512});
1561 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1562 b->Args({1, 14, 14, 1, 1, 0, 0, 2, 1, 1, 1024, 2048});
1563 /************************ Conv 5.X ************************/
1564 /* N H W KH KW PH PW S D G GCin GCout */
1565 b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 2048, 512});
1566 b->Args({1, 7, 7, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1567 //b->Args({1, 7, 7, 1, 1, 0, 0, 1, 1, 1, 512, 2048});
1568 }
1569
VGG(benchmark::internal::Benchmark * b)1570 static void VGG(benchmark::internal::Benchmark* b) {
1571 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1572
1573 /************************* Conv 1.1 ************************/
1574 /* N H W KH KW PH PW S D G GCin GCout */
1575 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 3, 64});
1576 /************************* Conv 1.2 ************************/
1577 /* N H W KH KW PH PW S D G GCin GCout */
1578 b->Args({1, 224, 224, 3, 3, 2, 2, 1, 1, 1, 64, 64});
1579
1580 /************************* Conv 2.1 ************************/
1581 /* N H W KH KW PH PW S D G GCin GCout */
1582 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 64, 128});
1583 /************************* Conv 2.2 ************************/
1584 /* N H W KH KW PH PW S D G GCin GCout */
1585 b->Args({1, 112, 112, 3, 3, 2, 2, 1, 1, 1, 128, 128});
1586
1587 /************************* Conv 3.1 ************************/
1588 /* N H W KH KW PH PW S D G GCin GCout */
1589 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 128, 256});
1590 /************************* Conv 3.2 ************************/
1591 /* N H W KH KW PH PW S D G GCin GCout */
1592 b->Args({1, 56, 56, 3, 3, 2, 2, 1, 1, 1, 256, 256});
1593 /************************* Conv 3.3 ************************/
1594 /* N H W KH KW PH PW S D G GCin GCout */
1595 b->Args({1, 56, 56, 1, 1, 0, 0, 1, 1, 1, 256, 256});
1596
1597 /************************* Conv 4.1 ************************/
1598 /* N H W KH KW PH PW S D G GCin GCout */
1599 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 256, 512});
1600 /************************* Conv 4.2 ************************/
1601 /* N H W KH KW PH PW S D G GCin GCout */
1602 b->Args({1, 28, 28, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1603 /************************* Conv 4.3 ************************/
1604 /* N H W KH KW PH PW S D G GCin GCout */
1605 b->Args({1, 28, 28, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1606
1607 /************************* Conv 5.X ************************/
1608 /* N H W KH KW PH PW S D G GCin GCout */
1609 b->Args({1, 14, 14, 3, 3, 2, 2, 1, 1, 1, 512, 512});
1610 /************************* Conv 5.3 ************************/
1611 /* N H W KH KW PH PW S D G GCin GCout */
1612 b->Args({1, 14, 14, 1, 1, 0, 0, 1, 1, 1, 512, 512});
1613 }
1614
1615 // SRCNN (9-1-5)
SRCNN915(benchmark::internal::Benchmark * b)1616 static void SRCNN915(benchmark::internal::Benchmark* b) {
1617 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1618
1619 /* N H W KH KW PH PW S D G GCin GCout */
1620 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1621 b->Args({1, 376, 376, 1, 1, 0, 0, 1, 1, 1, 64, 32});
1622 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1623 }
1624
1625 // SRCNN (9-3-5)
SRCNN935(benchmark::internal::Benchmark * b)1626 static void SRCNN935(benchmark::internal::Benchmark* b) {
1627 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1628
1629 /* N H W KH KW PH PW S D G GCin GCout */
1630 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1631 b->Args({1, 376, 376, 3, 3, 0, 0, 1, 1, 1, 64, 32});
1632 b->Args({1, 374, 374, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1633 }
1634
1635 // SRCNN (9-5-5)
SRCNN955(benchmark::internal::Benchmark * b)1636 static void SRCNN955(benchmark::internal::Benchmark* b) {
1637 b->ArgNames({"N", "H", "W", "KH", "KW", "PH", "PW", "S", "D", "G", "GCin", "GCout"});
1638
1639 /* N H W KH KW PH PW S D G GCin GCout */
1640 b->Args({1, 384, 384, 9, 9, 0, 0, 1, 1, 1, 1, 64});
1641 b->Args({1, 376, 376, 5, 5, 0, 0, 1, 1, 1, 64, 32});
1642 b->Args({1, 372, 372, 5, 5, 0, 0, 1, 1, 1, 32, 1});
1643 }
1644
1645 #ifndef XNN_NO_F16_OPERATORS
1646 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1647 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1648 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1649 BENCHMARK_CAPTURE(xnnpack_convolution_f16, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1650 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1651 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1652 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1653 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1654 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1655 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1656 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1657 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1658 BENCHMARK_CAPTURE(xnnpack_convolution_f16, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1659 BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1660 BENCHMARK_CAPTURE(xnnpack_convolution_f16, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1661 BENCHMARK_CAPTURE(xnnpack_convolution_f16, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1662 BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1663 BENCHMARK_CAPTURE(xnnpack_convolution_f16, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1664 BENCHMARK_CAPTURE(xnnpack_convolution_f16, vgg, "VGG")->Apply(VGG)->UseRealTime();
1665 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1666 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1667 BENCHMARK_CAPTURE(xnnpack_convolution_f16, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1668 #endif // XNN_NO_F16_OPERATORS
1669
1670 #ifndef XNN_NO_F32_OPERATORS
1671 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1672 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1673 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1674 BENCHMARK_CAPTURE(xnnpack_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1675 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1676 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1677 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1678 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1679 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1680 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1681 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1682 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1683 BENCHMARK_CAPTURE(xnnpack_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1684 BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1685 BENCHMARK_CAPTURE(xnnpack_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1686 BENCHMARK_CAPTURE(xnnpack_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1687 BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1688 BENCHMARK_CAPTURE(xnnpack_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1689 BENCHMARK_CAPTURE(xnnpack_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1690 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1691 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1692 BENCHMARK_CAPTURE(xnnpack_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1693 #endif // XNN_NO_F32_OPERATORS
1694
1695 #ifndef XNN_NO_QS8_OPERATORS
1696 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1697 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1698 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1699 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1700 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1701 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1702 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1703 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1704 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1705 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1706 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1707 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1708 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1709 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1710 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1711 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1712 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1713 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1714 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, vgg, "VGG")->Apply(VGG)->UseRealTime();
1715 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1716 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1717 BENCHMARK_CAPTURE(xnnpack_convolution_qs8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1718 #endif // XNN_NO_QS8_OPERATORS
1719
1720 #ifndef XNN_NO_QU8_OPERATORS
1721 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1722 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1723 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1724 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1725 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1726 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1727 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1728 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1729 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1730 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1731 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1732 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1733 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1734 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1735 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1736 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1737 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1738 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1739 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, vgg, "VGG")->Apply(VGG)->UseRealTime();
1740 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1741 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1742 BENCHMARK_CAPTURE(xnnpack_convolution_qu8, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1743 #endif // XNN_NO_QU8_OPERATORS
1744
1745 #ifdef BENCHMARK_TENSORFLOW_LITE
1746 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v1, "MobileNet v1")->Apply(MobileNetV1)->UseRealTime();
1747 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v2, "MobileNet v2")->Apply(MobileNetV2)->UseRealTime();
1748 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_small, "MobileNet v3 Small")->Apply(MobileNetV3Small)->UseRealTime();
1749 BENCHMARK_CAPTURE(tflite_convolution_f32, mobilenet_v3_large, "MobileNet v3 Large")->Apply(MobileNetV3Large)->UseRealTime();
1750 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
1751 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
1752 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
1753 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
1754 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
1755 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x05, "ShuffleNet v2 0.5X")->Apply(ShuffleNetV2X05)->UseRealTime();
1756 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x10, "ShuffleNet v2 1.0X")->Apply(ShuffleNetV2X10)->UseRealTime();
1757 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x15, "ShuffleNet v2 1.5X")->Apply(ShuffleNetV2X15)->UseRealTime();
1758 BENCHMARK_CAPTURE(tflite_convolution_f32, shufflenet_v2_x20, "ShuffleNet v2 2.0X")->Apply(ShuffleNetV2X20)->UseRealTime();
1759 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v10, "SqueezeNet 1.0")->Apply(SqueezeNetV10)->UseRealTime();
1760 BENCHMARK_CAPTURE(tflite_convolution_f32, squeezenet_v11, "SqueezeNet 1.1")->Apply(SqueezeNetV11)->UseRealTime();
1761 BENCHMARK_CAPTURE(tflite_convolution_f32, inception_v3, "Inception v3")->Apply(InceptionV3)->UseRealTime();
1762 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet18, "ResNet-18")->Apply(ResNet18)->UseRealTime();
1763 BENCHMARK_CAPTURE(tflite_convolution_f32, resnet50, "ResNet-50")->Apply(ResNet50)->UseRealTime();
1764 BENCHMARK_CAPTURE(tflite_convolution_f32, vgg, "VGG")->Apply(VGG)->UseRealTime();
1765 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn915, "SRCNN (9-1-5)")->Apply(SRCNN915)->UseRealTime();
1766 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn935, "SRCNN (9-3-5)")->Apply(SRCNN935)->UseRealTime();
1767 BENCHMARK_CAPTURE(tflite_convolution_f32, srcnn955, "SRCNN (9-5-5)")->Apply(SRCNN955)->UseRealTime();
1768 #endif // BENCHMARK_TENSORFLOW_LITE
1769
1770 #ifndef XNNPACK_BENCHMARK_NO_MAIN
1771 BENCHMARK_MAIN();
1772 #endif
1773