1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11
12 #include <xnnpack.h>
13
14 #include <benchmark/benchmark.h>
15 #include "bench/end2end.h"
16 #include "bench/utils.h"
17 #include "models/models.h"
18
19 #include <xnnpack.h>
20 #include <xnnpack/dwconv.h>
21 #include <xnnpack/microfnptr.h>
22 #include <xnnpack/microparams-init.h>
23
24
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_qu8_conv_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)25 static void DWConvEnd2EndBenchmark(
26 benchmark::State& state,
27 models::ExecutionPlanFactory model_factory,
28 xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,
29 xnn_init_qu8_conv_minmax_params_fn init_params,
30 uint8_t channel_tile, uint8_t primary_tile,
31 benchmark::utils::IsaCheckFunction isa_check = nullptr)
32 {
33 if (isa_check && !isa_check(state)) {
34 return;
35 }
36 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
37 state.SkipWithError("failed to initialize XNNPACK");
38 return;
39 }
40
41 // Override microkernels chosen in xnn_initialize
42 for (size_t i = 0; i < XNN_MAX_QU8_DWCONV_UKERNELS; i++) {
43 // Replace only the microkernel the matching kernel size.
44 if (xnn_params.qu8.dwconv[i].primary_tile == primary_tile) {
45 // Note: do not directly assign to xnn_params.qu8.dwconv[i] because it breaks older gcc.
46 xnn_params.qu8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
47 xnn_params.qu8.dwconv[i].channel_tile = channel_tile;
48 xnn_params.qu8.dwconv[i].primary_tile = primary_tile;
49 xnn_params.qu8.dwconv[i].incremental_tile = 0;
50 xnn_params.qu8.dwconv[i].init.qu8 = init_params;
51 break;
52 }
53 }
54
55 auto execution_plan = model_factory(nullptr);
56 if (execution_plan.empty()) {
57 state.SkipWithError("failed to create a model");
58 return;
59 }
60
61 for (auto _ : state) {
62 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
63 xnn_status status = xnn_run_operator(op.get(), nullptr);
64 if (status != xnn_status_success) {
65 state.SkipWithError("failed to run a model");
66 return;
67 }
68 }
69 }
70
71 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
72 if (cpu_frequency != 0) {
73 state.counters["cpufreq"] = cpu_frequency;
74 }
75 }
76
77
78 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
qu8_dwconv_up8x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)79 static void qu8_dwconv_up8x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
80 DWConvEnd2EndBenchmark(state, model,
81 xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8,
82 xnn_init_qu8_conv_minmax_rndnu_neon_params,
83 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
84 }
qu8_dwconv_up16x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)85 static void qu8_dwconv_up16x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
86 DWConvEnd2EndBenchmark(state, model,
87 xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8,
88 xnn_init_qu8_conv_minmax_rndnu_neon_params,
89 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
90 }
qu8_dwconv_up24x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)91 static void qu8_dwconv_up24x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
92 DWConvEnd2EndBenchmark(state, model,
93 xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8,
94 xnn_init_qu8_conv_minmax_rndnu_neon_params,
95 24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
96 }
qu8_dwconv_up32x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)97 static void qu8_dwconv_up32x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
98 DWConvEnd2EndBenchmark(state, model,
99 xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8,
100 xnn_init_qu8_conv_minmax_rndnu_neon_params,
101 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
102 }
qu8_dwconv_up8x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)103 static void qu8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
104 DWConvEnd2EndBenchmark(state, model,
105 xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
106 xnn_init_qu8_conv_minmax_rndnu_neon_params,
107 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
108 }
qu8_dwconv_up16x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)109 static void qu8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
110 DWConvEnd2EndBenchmark(state, model,
111 xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
112 xnn_init_qu8_conv_minmax_rndnu_neon_params,
113 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
114 }
qu8_dwconv_up24x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)115 static void qu8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
116 DWConvEnd2EndBenchmark(state, model,
117 xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
118 xnn_init_qu8_conv_minmax_rndnu_neon_params,
119 24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
120 }
qu8_dwconv_up32x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)121 static void qu8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
122 DWConvEnd2EndBenchmark(state, model,
123 xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
124 xnn_init_qu8_conv_minmax_rndnu_neon_params,
125 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
126 }
127
128 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul8);
129 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul8);
130 BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul8);
131 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul8);
132 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul16);
133 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul16);
134 BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul16);
135 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul16);
136 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
137
138
139 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)140 static void qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
141 DWConvEnd2EndBenchmark(state, model,
142 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
143 xnn_init_qu8_conv_minmax_fp32_avx512_params,
144 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
145 }
qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)146 static void qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
147 DWConvEnd2EndBenchmark(state, model,
148 xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
149 xnn_init_qu8_conv_minmax_fp32_avx512_params,
150 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
151 }
qu8_dwconv_up8x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)152 static void qu8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
153 DWConvEnd2EndBenchmark(state, model,
154 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
155 xnn_init_qu8_conv_minmax_fp32_avx2_params,
156 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
157 }
qu8_dwconv_up16x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)158 static void qu8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
159 DWConvEnd2EndBenchmark(state, model,
160 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
161 xnn_init_qu8_conv_minmax_fp32_avx2_params,
162 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
163 }
qu8_dwconv_up32x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)164 static void qu8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
165 DWConvEnd2EndBenchmark(state, model,
166 xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
167 xnn_init_qu8_conv_minmax_fp32_avx2_params,
168 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
169 }
qu8_dwconv_up8x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)170 static void qu8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
171 DWConvEnd2EndBenchmark(state, model,
172 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
173 xnn_init_qu8_conv_minmax_fp32_sse2_params,
174 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
175 }
qu8_dwconv_up16x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)176 static void qu8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
177 DWConvEnd2EndBenchmark(state, model,
178 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
179 xnn_init_qu8_conv_minmax_fp32_sse2_params,
180 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
181 }
qu8_dwconv_up8x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)182 static void qu8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
183 DWConvEnd2EndBenchmark(state, model,
184 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
185 xnn_init_qu8_conv_minmax_fp32_sse2_params,
186 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
187 }
qu8_dwconv_up16x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)188 static void qu8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
189 DWConvEnd2EndBenchmark(state, model,
190 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
191 xnn_init_qu8_conv_minmax_fp32_sse2_params,
192 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
193 }
qu8_dwconv_up8x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)194 static void qu8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
195 DWConvEnd2EndBenchmark(state, model,
196 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
197 xnn_init_qu8_conv_minmax_fp32_sse2_params,
198 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
199 }
qu8_dwconv_up16x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)200 static void qu8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
201 DWConvEnd2EndBenchmark(state, model,
202 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
203 xnn_init_qu8_conv_minmax_fp32_sse2_params,
204 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
205 }
qu8_dwconv_up8x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)206 static void qu8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
207 DWConvEnd2EndBenchmark(state, model,
208 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
209 xnn_init_qu8_conv_minmax_fp32_sse2_params,
210 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
211 }
qu8_dwconv_up16x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)212 static void qu8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
213 DWConvEnd2EndBenchmark(state, model,
214 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
215 xnn_init_qu8_conv_minmax_fp32_sse2_params,
216 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
217 }
qu8_dwconv_up8x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)218 static void qu8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
219 DWConvEnd2EndBenchmark(state, model,
220 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
221 xnn_init_qu8_conv_minmax_fp32_sse2_params,
222 8 /* channel tile */, 9 /* primary tile */);
223 }
qu8_dwconv_up16x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)224 static void qu8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
225 DWConvEnd2EndBenchmark(state, model,
226 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
227 xnn_init_qu8_conv_minmax_fp32_sse2_params,
228 16 /* channel tile */, 9 /* primary tile */);
229 }
230
231 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx512skx_mul32);
232 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx512skx_mul32);
233
234 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx2_mul32);
235 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx2_mul32);
236 BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx2_mul32);
237
238 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul16);
239 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul16);
240 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul32);
241 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul32);
242
243 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul16);
244 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul16);
245 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul32);
246 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul32);
247
248 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse2_mul16);
249 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse2_mul16);
250 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
251
252
253 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)254 static void qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
255 DWConvEnd2EndBenchmark(state, model,
256 xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
257 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
258 8 /* channel tile */, 9 /* primary tile */);
259 }
qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)260 static void qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
261 DWConvEnd2EndBenchmark(state, model,
262 xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
263 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
264 16 /* channel tile */, 9 /* primary tile */);
265 }
266
267 BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__wasmsimd_mul16);
268 BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__wasmsimd_mul16);
269 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
270
271
272 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up1x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)273 static void qu8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
274 DWConvEnd2EndBenchmark(state, model,
275 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
276 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
277 1 /* channel tile */, 9 /* primary tile */);
278 }
qu8_dwconv_up2x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)279 static void qu8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
280 DWConvEnd2EndBenchmark(state, model,
281 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
282 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
283 2 /* channel tile */, 9 /* primary tile */);
284 }
qu8_dwconv_up4x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)285 static void qu8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
286 DWConvEnd2EndBenchmark(state, model,
287 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
288 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
289 4 /* channel tile */, 9 /* primary tile */);
290 }
291
292 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__wasm_fmagic);
293 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__wasm_fmagic);
294 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__wasm_fmagic);
295 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
296
297
qu8_dwconv_up1x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)298 static void qu8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
299 DWConvEnd2EndBenchmark(state, model,
300 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
301 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
302 1 /* channel tile */, 9 /* primary tile */);
303 }
qu8_dwconv_up2x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)304 static void qu8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
305 DWConvEnd2EndBenchmark(state, model,
306 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
307 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
308 2 /* channel tile */, 9 /* primary tile */);
309 }
qu8_dwconv_up4x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)310 static void qu8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
311 DWConvEnd2EndBenchmark(state, model,
312 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
313 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
314 4 /* channel tile */, 9 /* primary tile */);
315 }
316
qu8_dwconv_up1x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)317 static void qu8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
318 DWConvEnd2EndBenchmark(state, model,
319 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
320 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
321 1 /* channel tile */, 9 /* primary tile */);
322 }
qu8_dwconv_up2x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)323 static void qu8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
324 DWConvEnd2EndBenchmark(state, model,
325 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
326 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
327 2 /* channel tile */, 9 /* primary tile */);
328 }
qu8_dwconv_up4x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)329 static void qu8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
330 DWConvEnd2EndBenchmark(state, model,
331 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
332 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
333 4 /* channel tile */, 9 /* primary tile */);
334 }
335
qu8_dwconv_up1x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)336 static void qu8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
337 DWConvEnd2EndBenchmark(state, model,
338 xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
339 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
340 1 /* channel tile */, 9 /* primary tile */);
341 }
qu8_dwconv_up2x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)342 static void qu8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
343 DWConvEnd2EndBenchmark(state, model,
344 xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
345 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
346 2 /* channel tile */, 9 /* primary tile */);
347 }
qu8_dwconv_up4x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)348 static void qu8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
349 DWConvEnd2EndBenchmark(state, model,
350 xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
351 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
352 4 /* channel tile */, 9 /* primary tile */);
353 }
354
355 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_fmagic);
356 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_fmagic);
357 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_fmagic);
358
359 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_imagic);
360 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_imagic);
361 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_imagic);
362
363 BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_lrintf);
364 BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_lrintf);
365 BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_lrintf);
366
367
368 #ifndef XNNPACK_BENCHMARK_NO_MAIN
369 BENCHMARK_MAIN();
370 #endif
371