xref: /aosp_15_r20/external/XNNPACK/bench/qu8-dwconv-e2e.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2021 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
7*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
8*4bdc9457SAndroid Build Coastguard Worker #include <functional>
9*4bdc9457SAndroid Build Coastguard Worker #include <random>
10*4bdc9457SAndroid Build Coastguard Worker #include <vector>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
13*4bdc9457SAndroid Build Coastguard Worker 
14*4bdc9457SAndroid Build Coastguard Worker #include <benchmark/benchmark.h>
15*4bdc9457SAndroid Build Coastguard Worker #include "bench/end2end.h"
16*4bdc9457SAndroid Build Coastguard Worker #include "bench/utils.h"
17*4bdc9457SAndroid Build Coastguard Worker #include "models/models.h"
18*4bdc9457SAndroid Build Coastguard Worker 
19*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
20*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/dwconv.h>
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microfnptr.h>
22*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
23*4bdc9457SAndroid Build Coastguard Worker 
24*4bdc9457SAndroid Build Coastguard Worker 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,xnn_init_qu8_conv_minmax_params_fn init_params,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)25*4bdc9457SAndroid Build Coastguard Worker static void DWConvEnd2EndBenchmark(
26*4bdc9457SAndroid Build Coastguard Worker   benchmark::State& state,
27*4bdc9457SAndroid Build Coastguard Worker   models::ExecutionPlanFactory model_factory,
28*4bdc9457SAndroid Build Coastguard Worker   xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv,
29*4bdc9457SAndroid Build Coastguard Worker   xnn_init_qu8_conv_minmax_params_fn init_params,
30*4bdc9457SAndroid Build Coastguard Worker   uint8_t channel_tile, uint8_t primary_tile,
31*4bdc9457SAndroid Build Coastguard Worker   benchmark::utils::IsaCheckFunction isa_check = nullptr)
32*4bdc9457SAndroid Build Coastguard Worker {
33*4bdc9457SAndroid Build Coastguard Worker   if (isa_check && !isa_check(state)) {
34*4bdc9457SAndroid Build Coastguard Worker     return;
35*4bdc9457SAndroid Build Coastguard Worker   }
36*4bdc9457SAndroid Build Coastguard Worker   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
37*4bdc9457SAndroid Build Coastguard Worker     state.SkipWithError("failed to initialize XNNPACK");
38*4bdc9457SAndroid Build Coastguard Worker     return;
39*4bdc9457SAndroid Build Coastguard Worker   }
40*4bdc9457SAndroid Build Coastguard Worker 
41*4bdc9457SAndroid Build Coastguard Worker   // Override microkernels chosen in xnn_initialize
42*4bdc9457SAndroid Build Coastguard Worker   for (size_t i = 0; i < XNN_MAX_QU8_DWCONV_UKERNELS; i++) {
43*4bdc9457SAndroid Build Coastguard Worker     // Replace only the microkernel the matching kernel size.
44*4bdc9457SAndroid Build Coastguard Worker     if (xnn_params.qu8.dwconv[i].primary_tile == primary_tile) {
45*4bdc9457SAndroid Build Coastguard Worker       // Note: do not directly assign to xnn_params.qu8.dwconv[i] because it breaks older gcc.
46*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
47*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[i].channel_tile = channel_tile;
48*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[i].primary_tile = primary_tile;
49*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[i].incremental_tile = 0;
50*4bdc9457SAndroid Build Coastguard Worker       xnn_params.qu8.dwconv[i].init.qu8 = init_params;
51*4bdc9457SAndroid Build Coastguard Worker       break;
52*4bdc9457SAndroid Build Coastguard Worker     }
53*4bdc9457SAndroid Build Coastguard Worker   }
54*4bdc9457SAndroid Build Coastguard Worker 
55*4bdc9457SAndroid Build Coastguard Worker   auto execution_plan = model_factory(nullptr);
56*4bdc9457SAndroid Build Coastguard Worker   if (execution_plan.empty()) {
57*4bdc9457SAndroid Build Coastguard Worker     state.SkipWithError("failed to create a model");
58*4bdc9457SAndroid Build Coastguard Worker     return;
59*4bdc9457SAndroid Build Coastguard Worker   }
60*4bdc9457SAndroid Build Coastguard Worker 
61*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
62*4bdc9457SAndroid Build Coastguard Worker     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
63*4bdc9457SAndroid Build Coastguard Worker       xnn_status status = xnn_run_operator(op.get(), nullptr);
64*4bdc9457SAndroid Build Coastguard Worker       if (status != xnn_status_success) {
65*4bdc9457SAndroid Build Coastguard Worker         state.SkipWithError("failed to run a model");
66*4bdc9457SAndroid Build Coastguard Worker         return;
67*4bdc9457SAndroid Build Coastguard Worker       }
68*4bdc9457SAndroid Build Coastguard Worker     }
69*4bdc9457SAndroid Build Coastguard Worker   }
70*4bdc9457SAndroid Build Coastguard Worker 
71*4bdc9457SAndroid Build Coastguard Worker   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
72*4bdc9457SAndroid Build Coastguard Worker   if (cpu_frequency != 0) {
73*4bdc9457SAndroid Build Coastguard Worker     state.counters["cpufreq"] = cpu_frequency;
74*4bdc9457SAndroid Build Coastguard Worker   }
75*4bdc9457SAndroid Build Coastguard Worker }
76*4bdc9457SAndroid Build Coastguard Worker 
77*4bdc9457SAndroid Build Coastguard Worker 
78*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
qu8_dwconv_up8x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)79*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
80*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
81*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8,
82*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
83*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
84*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)85*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
86*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
87*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8,
88*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
89*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
90*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up24x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)91*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up24x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
92*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
93*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8,
94*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
95*4bdc9457SAndroid Build Coastguard Worker       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
96*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up32x9__neon_mul8(benchmark::State & state,models::ExecutionPlanFactory model)97*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up32x9__neon_mul8(benchmark::State& state, models::ExecutionPlanFactory model) {
98*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
99*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8,
100*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
101*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
102*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)103*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
104*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
105*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
106*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
107*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
108*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)109*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
110*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
111*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
112*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
113*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
114*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up24x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)115*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
116*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
117*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
118*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
119*4bdc9457SAndroid Build Coastguard Worker       24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
120*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up32x9__neon_mul16(benchmark::State & state,models::ExecutionPlanFactory model)121*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
122*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
123*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
124*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_rndnu_neon_params,
125*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
126*4bdc9457SAndroid Build Coastguard Worker   }
127*4bdc9457SAndroid Build Coastguard Worker 
128*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul8);
129*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul8);
130*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul8);
131*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul8);
132*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__neon_mul16);
133*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__neon_mul16);
134*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up24x9__neon_mul16);
135*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__neon_mul16);
136*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
137*4bdc9457SAndroid Build Coastguard Worker 
138*4bdc9457SAndroid Build Coastguard Worker 
139*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)140*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
141*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
142*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
143*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_avx512_params,
144*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
145*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)146*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
147*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
148*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
149*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_avx512_params,
150*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
151*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)152*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
153*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
154*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
155*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_avx2_params,
156*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
157*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)158*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
159*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
160*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
161*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_avx2_params,
162*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
163*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up32x9__avx2_mul32(benchmark::State & state,models::ExecutionPlanFactory model)164*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
165*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
166*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
167*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_avx2_params,
168*4bdc9457SAndroid Build Coastguard Worker       32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
169*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)170*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
171*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
172*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
173*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
174*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
175*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__avx_mul16(benchmark::State & state,models::ExecutionPlanFactory model)176*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
177*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
178*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
179*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
180*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
181*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)182*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
183*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
184*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
185*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
186*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
187*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__avx_mul32(benchmark::State & state,models::ExecutionPlanFactory model)188*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
189*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
190*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
191*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
192*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
193*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)194*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
195*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
196*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
197*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
198*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
199*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__sse41_mul16(benchmark::State & state,models::ExecutionPlanFactory model)200*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
201*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
202*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
203*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
204*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
205*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)206*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
207*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
208*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
209*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
210*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
211*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__sse41_mul32(benchmark::State & state,models::ExecutionPlanFactory model)212*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
213*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
214*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
215*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
216*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
217*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up8x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)218*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
219*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
220*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
221*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
222*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */);
223*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__sse2_mul16(benchmark::State & state,models::ExecutionPlanFactory model)224*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
225*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
226*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
227*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_sse2_params,
228*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */);
229*4bdc9457SAndroid Build Coastguard Worker   }
230*4bdc9457SAndroid Build Coastguard Worker 
231*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx512skx_mul32);
232*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx512skx_mul32);
233*4bdc9457SAndroid Build Coastguard Worker 
234*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx2_mul32);
235*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx2_mul32);
236*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up32x9__avx2_mul32);
237*4bdc9457SAndroid Build Coastguard Worker 
238*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul16);
239*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul16);
240*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__avx_mul32);
241*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__avx_mul32);
242*4bdc9457SAndroid Build Coastguard Worker 
243*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul16);
244*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul16);
245*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse41_mul32);
246*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse41_mul32);
247*4bdc9457SAndroid Build Coastguard Worker 
248*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__sse2_mul16);
249*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__sse2_mul16);
250*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
251*4bdc9457SAndroid Build Coastguard Worker 
252*4bdc9457SAndroid Build Coastguard Worker 
253*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)254*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
255*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
256*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
257*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
258*4bdc9457SAndroid Build Coastguard Worker       8 /* channel tile */, 9 /* primary tile */);
259*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State & state,models::ExecutionPlanFactory model)260*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
261*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
262*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
263*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
264*4bdc9457SAndroid Build Coastguard Worker       16 /* channel tile */, 9 /* primary tile */);
265*4bdc9457SAndroid Build Coastguard Worker   }
266*4bdc9457SAndroid Build Coastguard Worker 
267*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up8x9__wasmsimd_mul16);
268*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up16x9__wasmsimd_mul16);
269*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
270*4bdc9457SAndroid Build Coastguard Worker 
271*4bdc9457SAndroid Build Coastguard Worker 
272*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
qu8_dwconv_up1x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)273*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
274*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
275*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
276*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
277*4bdc9457SAndroid Build Coastguard Worker       1 /* channel tile */, 9 /* primary tile */);
278*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up2x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)279*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
280*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
281*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
282*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
283*4bdc9457SAndroid Build Coastguard Worker       2 /* channel tile */, 9 /* primary tile */);
284*4bdc9457SAndroid Build Coastguard Worker   }
qu8_dwconv_up4x9__wasm_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)285*4bdc9457SAndroid Build Coastguard Worker   static void qu8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
286*4bdc9457SAndroid Build Coastguard Worker     DWConvEnd2EndBenchmark(state, model,
287*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
288*4bdc9457SAndroid Build Coastguard Worker       xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
289*4bdc9457SAndroid Build Coastguard Worker       4 /* channel tile */, 9 /* primary tile */);
290*4bdc9457SAndroid Build Coastguard Worker   }
291*4bdc9457SAndroid Build Coastguard Worker 
292*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__wasm_fmagic);
293*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__wasm_fmagic);
294*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__wasm_fmagic);
295*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
296*4bdc9457SAndroid Build Coastguard Worker 
297*4bdc9457SAndroid Build Coastguard Worker 
qu8_dwconv_up1x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)298*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
299*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
300*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
301*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
302*4bdc9457SAndroid Build Coastguard Worker     1 /* channel tile */, 9 /* primary tile */);
303*4bdc9457SAndroid Build Coastguard Worker }
qu8_dwconv_up2x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)304*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
305*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
306*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
307*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
308*4bdc9457SAndroid Build Coastguard Worker     2 /* channel tile */, 9 /* primary tile */);
309*4bdc9457SAndroid Build Coastguard Worker }
qu8_dwconv_up4x9__scalar_fmagic(benchmark::State & state,models::ExecutionPlanFactory model)310*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
311*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
312*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
313*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
314*4bdc9457SAndroid Build Coastguard Worker     4 /* channel tile */, 9 /* primary tile */);
315*4bdc9457SAndroid Build Coastguard Worker }
316*4bdc9457SAndroid Build Coastguard Worker 
qu8_dwconv_up1x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)317*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
318*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
319*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
320*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
321*4bdc9457SAndroid Build Coastguard Worker     1 /* channel tile */, 9 /* primary tile */);
322*4bdc9457SAndroid Build Coastguard Worker }
qu8_dwconv_up2x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)323*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
324*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
325*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
326*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
327*4bdc9457SAndroid Build Coastguard Worker     2 /* channel tile */, 9 /* primary tile */);
328*4bdc9457SAndroid Build Coastguard Worker }
qu8_dwconv_up4x9__scalar_imagic(benchmark::State & state,models::ExecutionPlanFactory model)329*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
330*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
331*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
332*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
333*4bdc9457SAndroid Build Coastguard Worker     4 /* channel tile */, 9 /* primary tile */);
334*4bdc9457SAndroid Build Coastguard Worker }
335*4bdc9457SAndroid Build Coastguard Worker 
qu8_dwconv_up1x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)336*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
337*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
338*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
339*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
340*4bdc9457SAndroid Build Coastguard Worker     1 /* channel tile */, 9 /* primary tile */);
341*4bdc9457SAndroid Build Coastguard Worker }
qu8_dwconv_up2x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)342*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
343*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
344*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
345*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
346*4bdc9457SAndroid Build Coastguard Worker     2 /* channel tile */, 9 /* primary tile */);
347*4bdc9457SAndroid Build Coastguard Worker }
qu8_dwconv_up4x9__scalar_lrintf(benchmark::State & state,models::ExecutionPlanFactory model)348*4bdc9457SAndroid Build Coastguard Worker static void qu8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
349*4bdc9457SAndroid Build Coastguard Worker   DWConvEnd2EndBenchmark(state, model,
350*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
351*4bdc9457SAndroid Build Coastguard Worker     xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
352*4bdc9457SAndroid Build Coastguard Worker     4 /* channel tile */, 9 /* primary tile */);
353*4bdc9457SAndroid Build Coastguard Worker }
354*4bdc9457SAndroid Build Coastguard Worker 
355*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_fmagic);
356*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_fmagic);
357*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_fmagic);
358*4bdc9457SAndroid Build Coastguard Worker 
359*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_imagic);
360*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_imagic);
361*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_imagic);
362*4bdc9457SAndroid Build Coastguard Worker 
363*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up1x9__scalar_lrintf);
364*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up2x9__scalar_lrintf);
365*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_QU8_END2END(qu8_dwconv_up4x9__scalar_lrintf);
366*4bdc9457SAndroid Build Coastguard Worker 
367*4bdc9457SAndroid Build Coastguard Worker 
368*4bdc9457SAndroid Build Coastguard Worker #ifndef XNNPACK_BENCHMARK_NO_MAIN
369*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_MAIN();
370*4bdc9457SAndroid Build Coastguard Worker #endif
371