1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
7*4bdc9457SAndroid Build Coastguard Worker #include <cfloat>
8*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
9*4bdc9457SAndroid Build Coastguard Worker #include <functional>
10*4bdc9457SAndroid Build Coastguard Worker #include <limits>
11*4bdc9457SAndroid Build Coastguard Worker #include <random>
12*4bdc9457SAndroid Build Coastguard Worker #include <vector>
13*4bdc9457SAndroid Build Coastguard Worker
14*4bdc9457SAndroid Build Coastguard Worker #include <benchmark/benchmark.h>
15*4bdc9457SAndroid Build Coastguard Worker #include "bench/conv.h"
16*4bdc9457SAndroid Build Coastguard Worker #include "bench/utils.h"
17*4bdc9457SAndroid Build Coastguard Worker
18*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
19*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/aligned-allocator.h>
20*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/igemm.h>
22*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/indirection.h>
23*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/operator.h>
24*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microfnptr.h>
25*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
26*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/pack.h>
27*4bdc9457SAndroid Build Coastguard Worker
28*4bdc9457SAndroid Build Coastguard Worker
f32_igemm(benchmark::State & state,xnn_f32_igemm_minmax_ukernel_function igemm,uint32_t mr,uint32_t nr,uint32_t kr,uint32_t sr,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)29*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm(benchmark::State& state,
30*4bdc9457SAndroid Build Coastguard Worker xnn_f32_igemm_minmax_ukernel_function igemm,
31*4bdc9457SAndroid Build Coastguard Worker uint32_t mr, uint32_t nr, uint32_t kr, uint32_t sr,
32*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_params_fn init_params,
33*4bdc9457SAndroid Build Coastguard Worker benchmark::utils::IsaCheckFunction isa_check = nullptr)
34*4bdc9457SAndroid Build Coastguard Worker {
35*4bdc9457SAndroid Build Coastguard Worker if (isa_check && !isa_check(state)) {
36*4bdc9457SAndroid Build Coastguard Worker return;
37*4bdc9457SAndroid Build Coastguard Worker }
38*4bdc9457SAndroid Build Coastguard Worker
39*4bdc9457SAndroid Build Coastguard Worker const size_t input_height = state.range(0);
40*4bdc9457SAndroid Build Coastguard Worker const size_t input_width = state.range(1);
41*4bdc9457SAndroid Build Coastguard Worker const size_t kernel_height = state.range(2);
42*4bdc9457SAndroid Build Coastguard Worker const size_t kernel_width = state.range(3);
43*4bdc9457SAndroid Build Coastguard Worker const size_t kernel_size = kernel_height * kernel_width;
44*4bdc9457SAndroid Build Coastguard Worker const size_t padding_height = state.range(4);
45*4bdc9457SAndroid Build Coastguard Worker const size_t padding_width = state.range(5);
46*4bdc9457SAndroid Build Coastguard Worker const size_t subsampling = state.range(6);
47*4bdc9457SAndroid Build Coastguard Worker const size_t dilation = state.range(7);
48*4bdc9457SAndroid Build Coastguard Worker const size_t group_input_channels = state.range(8);
49*4bdc9457SAndroid Build Coastguard Worker const size_t group_output_channels = state.range(9);
50*4bdc9457SAndroid Build Coastguard Worker
51*4bdc9457SAndroid Build Coastguard Worker std::random_device random_device;
52*4bdc9457SAndroid Build Coastguard Worker auto rng = std::mt19937(random_device());
53*4bdc9457SAndroid Build Coastguard Worker auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
54*4bdc9457SAndroid Build Coastguard Worker
55*4bdc9457SAndroid Build Coastguard Worker const size_t output_pixel_stride = group_output_channels;
56*4bdc9457SAndroid Build Coastguard Worker const size_t input_pixel_stride = group_input_channels;
57*4bdc9457SAndroid Build Coastguard Worker const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
58*4bdc9457SAndroid Build Coastguard Worker const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
59*4bdc9457SAndroid Build Coastguard Worker const size_t padding_left = padding_width / 2;
60*4bdc9457SAndroid Build Coastguard Worker const size_t padding_top = padding_height / 2;
61*4bdc9457SAndroid Build Coastguard Worker const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
62*4bdc9457SAndroid Build Coastguard Worker const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
63*4bdc9457SAndroid Build Coastguard Worker const size_t output_size = output_height * output_width;
64*4bdc9457SAndroid Build Coastguard Worker
65*4bdc9457SAndroid Build Coastguard Worker const size_t mc_stride = benchmark::utils::RoundUp<size_t>(output_size, mr);
66*4bdc9457SAndroid Build Coastguard Worker const size_t nc_stride = benchmark::utils::RoundUp<size_t>(group_output_channels, nr);
67*4bdc9457SAndroid Build Coastguard Worker const size_t kc_stride = benchmark::utils::RoundUp<size_t>(group_input_channels, kr * sr);
68*4bdc9457SAndroid Build Coastguard Worker
69*4bdc9457SAndroid Build Coastguard Worker std::vector<float> a(input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
70*4bdc9457SAndroid Build Coastguard Worker std::generate(a.begin(), a.end(), std::ref(f32rng));
71*4bdc9457SAndroid Build Coastguard Worker std::vector<float> k(group_output_channels * kernel_height * kernel_width * group_input_channels);
72*4bdc9457SAndroid Build Coastguard Worker std::generate(k.begin(), k.end(), std::ref(f32rng));
73*4bdc9457SAndroid Build Coastguard Worker std::vector<float> b(group_output_channels);
74*4bdc9457SAndroid Build Coastguard Worker std::generate(b.begin(), b.end(), std::ref(f32rng));
75*4bdc9457SAndroid Build Coastguard Worker
76*4bdc9457SAndroid Build Coastguard Worker std::vector<float> z(group_input_channels + XNN_EXTRA_BYTES / sizeof(float));
77*4bdc9457SAndroid Build Coastguard Worker
78*4bdc9457SAndroid Build Coastguard Worker const size_t w_elements = kernel_size * kc_stride * nc_stride + nc_stride;
79*4bdc9457SAndroid Build Coastguard Worker const size_t i_elements = mc_stride * kernel_size;
80*4bdc9457SAndroid Build Coastguard Worker const size_t c_elements = output_height * output_width * output_pixel_stride;
81*4bdc9457SAndroid Build Coastguard Worker const size_t num_buffers = 1 +
82*4bdc9457SAndroid Build Coastguard Worker benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
83*4bdc9457SAndroid Build Coastguard Worker sizeof(float) * (w_elements + c_elements) + sizeof(void*) * i_elements);
84*4bdc9457SAndroid Build Coastguard Worker
85*4bdc9457SAndroid Build Coastguard Worker std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
86*4bdc9457SAndroid Build Coastguard Worker std::fill(w.begin(), w.end(), 0.0f);
87*4bdc9457SAndroid Build Coastguard Worker xnn_pack_f32_conv_goki_w(
88*4bdc9457SAndroid Build Coastguard Worker 1 /* groups */, group_output_channels, kernel_size, group_input_channels,
89*4bdc9457SAndroid Build Coastguard Worker nr, kr, sr, k.data(), b.data(), w.data(), 0 /* extra bytes */, nullptr);
90*4bdc9457SAndroid Build Coastguard Worker for (size_t n = 1; n < num_buffers; n++) {
91*4bdc9457SAndroid Build Coastguard Worker std::copy(w.cbegin(), w.cbegin() + w_elements, w.begin() + n * w_elements);
92*4bdc9457SAndroid Build Coastguard Worker }
93*4bdc9457SAndroid Build Coastguard Worker
94*4bdc9457SAndroid Build Coastguard Worker std::vector<const float*> i(i_elements * num_buffers);
95*4bdc9457SAndroid Build Coastguard Worker xnn_operator convolution_op = { };
96*4bdc9457SAndroid Build Coastguard Worker convolution_op.indirection_buffer = reinterpret_cast<const void**>(i.data());
97*4bdc9457SAndroid Build Coastguard Worker convolution_op.input = a.data();
98*4bdc9457SAndroid Build Coastguard Worker convolution_op.input_pixel_stride = input_pixel_stride;
99*4bdc9457SAndroid Build Coastguard Worker convolution_op.zero_buffer = z.data();
100*4bdc9457SAndroid Build Coastguard Worker convolution_op.groups = 1;
101*4bdc9457SAndroid Build Coastguard Worker convolution_op.group_input_channels = group_input_channels;
102*4bdc9457SAndroid Build Coastguard Worker convolution_op.batch_size = 1;
103*4bdc9457SAndroid Build Coastguard Worker convolution_op.input_height = input_height;
104*4bdc9457SAndroid Build Coastguard Worker convolution_op.input_width = input_width;
105*4bdc9457SAndroid Build Coastguard Worker convolution_op.output_height = output_height;
106*4bdc9457SAndroid Build Coastguard Worker convolution_op.output_width = output_width;
107*4bdc9457SAndroid Build Coastguard Worker convolution_op.kernel_height = kernel_height;
108*4bdc9457SAndroid Build Coastguard Worker convolution_op.kernel_width = kernel_width;
109*4bdc9457SAndroid Build Coastguard Worker convolution_op.stride_height = subsampling;
110*4bdc9457SAndroid Build Coastguard Worker convolution_op.stride_width = subsampling;
111*4bdc9457SAndroid Build Coastguard Worker convolution_op.dilation_height = dilation;
112*4bdc9457SAndroid Build Coastguard Worker convolution_op.dilation_width = dilation;
113*4bdc9457SAndroid Build Coastguard Worker convolution_op.padding_top = padding_top;
114*4bdc9457SAndroid Build Coastguard Worker convolution_op.padding_left = padding_left;
115*4bdc9457SAndroid Build Coastguard Worker xnn_indirection_init_conv2d(&convolution_op, mr, 2 /* log2(sizeof(float)) */);
116*4bdc9457SAndroid Build Coastguard Worker for (size_t n = 1; n < num_buffers; n++) {
117*4bdc9457SAndroid Build Coastguard Worker std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
118*4bdc9457SAndroid Build Coastguard Worker }
119*4bdc9457SAndroid Build Coastguard Worker
120*4bdc9457SAndroid Build Coastguard Worker std::vector<float> c(c_elements * num_buffers);
121*4bdc9457SAndroid Build Coastguard Worker std::fill(c.begin(), c.end(), std::nanf(""));
122*4bdc9457SAndroid Build Coastguard Worker
123*4bdc9457SAndroid Build Coastguard Worker xnn_f32_minmax_params params;
124*4bdc9457SAndroid Build Coastguard Worker init_params(¶ms,
125*4bdc9457SAndroid Build Coastguard Worker -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
126*4bdc9457SAndroid Build Coastguard Worker
127*4bdc9457SAndroid Build Coastguard Worker size_t buffer_index = 0;
128*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
129*4bdc9457SAndroid Build Coastguard Worker state.PauseTiming();
130*4bdc9457SAndroid Build Coastguard Worker benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
131*4bdc9457SAndroid Build Coastguard Worker buffer_index = (buffer_index + 1) % num_buffers;
132*4bdc9457SAndroid Build Coastguard Worker state.ResumeTiming();
133*4bdc9457SAndroid Build Coastguard Worker
134*4bdc9457SAndroid Build Coastguard Worker for (uint32_t m = 0; m < output_size; m += mr) {
135*4bdc9457SAndroid Build Coastguard Worker const uint32_t mb = min(output_size - m, mr);
136*4bdc9457SAndroid Build Coastguard Worker igemm(
137*4bdc9457SAndroid Build Coastguard Worker mb, group_output_channels, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
138*4bdc9457SAndroid Build Coastguard Worker i.data() + buffer_index * i_elements + m,
139*4bdc9457SAndroid Build Coastguard Worker w.data() + buffer_index * w_elements,
140*4bdc9457SAndroid Build Coastguard Worker c.data() + buffer_index * c_elements + m * group_output_channels, group_output_channels * sizeof(float), nr * sizeof(float),
141*4bdc9457SAndroid Build Coastguard Worker 0, z.data(), ¶ms);
142*4bdc9457SAndroid Build Coastguard Worker }
143*4bdc9457SAndroid Build Coastguard Worker }
144*4bdc9457SAndroid Build Coastguard Worker
145*4bdc9457SAndroid Build Coastguard Worker const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
146*4bdc9457SAndroid Build Coastguard Worker if (cpu_frequency != 0) {
147*4bdc9457SAndroid Build Coastguard Worker state.counters["cpufreq"] = cpu_frequency;
148*4bdc9457SAndroid Build Coastguard Worker }
149*4bdc9457SAndroid Build Coastguard Worker
150*4bdc9457SAndroid Build Coastguard Worker state.counters["FLOPS"] = benchmark::Counter(
151*4bdc9457SAndroid Build Coastguard Worker uint64_t(state.iterations()) * 2 *
152*4bdc9457SAndroid Build Coastguard Worker output_height * output_width *
153*4bdc9457SAndroid Build Coastguard Worker group_input_channels * group_output_channels *
154*4bdc9457SAndroid Build Coastguard Worker kernel_height * kernel_width,
155*4bdc9457SAndroid Build Coastguard Worker benchmark::Counter::kIsRate);
156*4bdc9457SAndroid Build Coastguard Worker }
157*4bdc9457SAndroid Build Coastguard Worker
158*4bdc9457SAndroid Build Coastguard Worker #if XNN_PLATFORM_JIT
f32_igemm(benchmark::State & state,xnn_jit_igemm_code_generator_function generator,size_t mr,size_t nr,size_t kr,size_t sr,xnn_init_f32_minmax_params_fn init_params,benchmark::utils::IsaCheckFunction isa_check=nullptr)159*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm(benchmark::State& state,
160*4bdc9457SAndroid Build Coastguard Worker xnn_jit_igemm_code_generator_function generator,
161*4bdc9457SAndroid Build Coastguard Worker size_t mr, size_t nr, size_t kr, size_t sr,
162*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_params_fn init_params,
163*4bdc9457SAndroid Build Coastguard Worker benchmark::utils::IsaCheckFunction isa_check = nullptr)
164*4bdc9457SAndroid Build Coastguard Worker {
165*4bdc9457SAndroid Build Coastguard Worker if (isa_check && !isa_check(state)) {
166*4bdc9457SAndroid Build Coastguard Worker return;
167*4bdc9457SAndroid Build Coastguard Worker }
168*4bdc9457SAndroid Build Coastguard Worker
169*4bdc9457SAndroid Build Coastguard Worker const size_t input_height = state.range(0);
170*4bdc9457SAndroid Build Coastguard Worker const size_t input_width = state.range(1);
171*4bdc9457SAndroid Build Coastguard Worker const size_t kernel_height = state.range(2);
172*4bdc9457SAndroid Build Coastguard Worker const size_t kernel_width = state.range(3);
173*4bdc9457SAndroid Build Coastguard Worker const size_t kernel_size = kernel_height * kernel_width;
174*4bdc9457SAndroid Build Coastguard Worker const size_t padding_height = state.range(4);
175*4bdc9457SAndroid Build Coastguard Worker const size_t padding_width = state.range(5);
176*4bdc9457SAndroid Build Coastguard Worker const size_t subsampling = state.range(6);
177*4bdc9457SAndroid Build Coastguard Worker const size_t dilation = state.range(7);
178*4bdc9457SAndroid Build Coastguard Worker const size_t group_input_channels = state.range(8);
179*4bdc9457SAndroid Build Coastguard Worker const size_t group_output_channels = state.range(9);
180*4bdc9457SAndroid Build Coastguard Worker
181*4bdc9457SAndroid Build Coastguard Worker std::random_device random_device;
182*4bdc9457SAndroid Build Coastguard Worker auto rng = std::mt19937(random_device());
183*4bdc9457SAndroid Build Coastguard Worker auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
184*4bdc9457SAndroid Build Coastguard Worker
185*4bdc9457SAndroid Build Coastguard Worker const size_t output_pixel_stride = group_output_channels;
186*4bdc9457SAndroid Build Coastguard Worker const size_t input_pixel_stride = group_input_channels;
187*4bdc9457SAndroid Build Coastguard Worker const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
188*4bdc9457SAndroid Build Coastguard Worker const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
189*4bdc9457SAndroid Build Coastguard Worker const size_t padding_left = padding_width / 2;
190*4bdc9457SAndroid Build Coastguard Worker const size_t padding_top = padding_height / 2;
191*4bdc9457SAndroid Build Coastguard Worker const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
192*4bdc9457SAndroid Build Coastguard Worker const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
193*4bdc9457SAndroid Build Coastguard Worker const size_t output_size = output_height * output_width;
194*4bdc9457SAndroid Build Coastguard Worker
195*4bdc9457SAndroid Build Coastguard Worker const size_t mc_stride = benchmark::utils::RoundUp<size_t>(output_size, mr);
196*4bdc9457SAndroid Build Coastguard Worker const size_t nc_stride = benchmark::utils::RoundUp<size_t>(group_output_channels, nr);
197*4bdc9457SAndroid Build Coastguard Worker const size_t kc_stride = benchmark::utils::RoundUp<size_t>(group_input_channels, kr * sr);
198*4bdc9457SAndroid Build Coastguard Worker
199*4bdc9457SAndroid Build Coastguard Worker std::vector<float> a(input_height * input_width * input_pixel_stride + XNN_EXTRA_BYTES / sizeof(float));
200*4bdc9457SAndroid Build Coastguard Worker std::generate(a.begin(), a.end(), std::ref(f32rng));
201*4bdc9457SAndroid Build Coastguard Worker std::vector<float> k(group_output_channels * kernel_height * kernel_width * group_input_channels);
202*4bdc9457SAndroid Build Coastguard Worker std::generate(k.begin(), k.end(), std::ref(f32rng));
203*4bdc9457SAndroid Build Coastguard Worker std::vector<float> b(group_output_channels);
204*4bdc9457SAndroid Build Coastguard Worker std::generate(b.begin(), b.end(), std::ref(f32rng));
205*4bdc9457SAndroid Build Coastguard Worker
206*4bdc9457SAndroid Build Coastguard Worker std::vector<float> z(group_input_channels + XNN_EXTRA_BYTES / sizeof(float));
207*4bdc9457SAndroid Build Coastguard Worker
208*4bdc9457SAndroid Build Coastguard Worker const size_t w_elements = kernel_size * kc_stride * nc_stride + nc_stride;
209*4bdc9457SAndroid Build Coastguard Worker const size_t i_elements = mc_stride * kernel_size;
210*4bdc9457SAndroid Build Coastguard Worker const size_t c_elements = output_height * output_width * output_pixel_stride;
211*4bdc9457SAndroid Build Coastguard Worker const size_t num_buffers = 1 +
212*4bdc9457SAndroid Build Coastguard Worker benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
213*4bdc9457SAndroid Build Coastguard Worker sizeof(float) * (w_elements + c_elements) + sizeof(void*) * i_elements);
214*4bdc9457SAndroid Build Coastguard Worker
215*4bdc9457SAndroid Build Coastguard Worker std::vector<float, AlignedAllocator<float, 64>> w(w_elements * num_buffers);
216*4bdc9457SAndroid Build Coastguard Worker std::fill(w.begin(), w.end(), 0.0f);
217*4bdc9457SAndroid Build Coastguard Worker xnn_pack_f32_conv_goki_w(
218*4bdc9457SAndroid Build Coastguard Worker 1 /* groups */, group_output_channels, kernel_size, group_input_channels,
219*4bdc9457SAndroid Build Coastguard Worker nr, kr, sr, k.data(), b.data(), w.data(), 0 /* extra bytes */, nullptr);
220*4bdc9457SAndroid Build Coastguard Worker for (size_t n = 1; n < num_buffers; n++) {
221*4bdc9457SAndroid Build Coastguard Worker std::copy(w.cbegin(), w.cbegin() + w_elements, w.begin() + n * w_elements);
222*4bdc9457SAndroid Build Coastguard Worker }
223*4bdc9457SAndroid Build Coastguard Worker
224*4bdc9457SAndroid Build Coastguard Worker std::vector<const float*> i(i_elements * num_buffers);
225*4bdc9457SAndroid Build Coastguard Worker xnn_operator convolution_op = { };
226*4bdc9457SAndroid Build Coastguard Worker convolution_op.indirection_buffer = reinterpret_cast<const void**>(i.data());
227*4bdc9457SAndroid Build Coastguard Worker convolution_op.input = a.data();
228*4bdc9457SAndroid Build Coastguard Worker convolution_op.input_pixel_stride = input_pixel_stride;
229*4bdc9457SAndroid Build Coastguard Worker convolution_op.zero_buffer = z.data();
230*4bdc9457SAndroid Build Coastguard Worker convolution_op.groups = 1;
231*4bdc9457SAndroid Build Coastguard Worker convolution_op.group_input_channels = group_input_channels;
232*4bdc9457SAndroid Build Coastguard Worker convolution_op.batch_size = 1;
233*4bdc9457SAndroid Build Coastguard Worker convolution_op.input_height = input_height;
234*4bdc9457SAndroid Build Coastguard Worker convolution_op.input_width = input_width;
235*4bdc9457SAndroid Build Coastguard Worker convolution_op.output_height = output_height;
236*4bdc9457SAndroid Build Coastguard Worker convolution_op.output_width = output_width;
237*4bdc9457SAndroid Build Coastguard Worker convolution_op.kernel_height = kernel_height;
238*4bdc9457SAndroid Build Coastguard Worker convolution_op.kernel_width = kernel_width;
239*4bdc9457SAndroid Build Coastguard Worker convolution_op.stride_height = subsampling;
240*4bdc9457SAndroid Build Coastguard Worker convolution_op.stride_width = subsampling;
241*4bdc9457SAndroid Build Coastguard Worker convolution_op.dilation_height = dilation;
242*4bdc9457SAndroid Build Coastguard Worker convolution_op.dilation_width = dilation;
243*4bdc9457SAndroid Build Coastguard Worker convolution_op.padding_top = padding_top;
244*4bdc9457SAndroid Build Coastguard Worker convolution_op.padding_left = padding_left;
245*4bdc9457SAndroid Build Coastguard Worker xnn_indirection_init_conv2d(&convolution_op, mr, 2 /* log2(sizeof(float)) */);
246*4bdc9457SAndroid Build Coastguard Worker for (size_t n = 1; n < num_buffers; n++) {
247*4bdc9457SAndroid Build Coastguard Worker std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
248*4bdc9457SAndroid Build Coastguard Worker }
249*4bdc9457SAndroid Build Coastguard Worker
250*4bdc9457SAndroid Build Coastguard Worker std::vector<float> c(c_elements * num_buffers);
251*4bdc9457SAndroid Build Coastguard Worker std::fill(c.begin(), c.end(), std::nanf(""));
252*4bdc9457SAndroid Build Coastguard Worker
253*4bdc9457SAndroid Build Coastguard Worker xnn_f32_minmax_params params;
254*4bdc9457SAndroid Build Coastguard Worker init_params(¶ms,
255*4bdc9457SAndroid Build Coastguard Worker -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
256*4bdc9457SAndroid Build Coastguard Worker
257*4bdc9457SAndroid Build Coastguard Worker jit_gemm_params jit_params = {
258*4bdc9457SAndroid Build Coastguard Worker .f32_minmax = {
259*4bdc9457SAndroid Build Coastguard Worker .min = -std::numeric_limits<float>::infinity(),
260*4bdc9457SAndroid Build Coastguard Worker .max = +std::numeric_limits<float>::infinity()
261*4bdc9457SAndroid Build Coastguard Worker }
262*4bdc9457SAndroid Build Coastguard Worker };
263*4bdc9457SAndroid Build Coastguard Worker
264*4bdc9457SAndroid Build Coastguard Worker xnn_initialize(/*allocator=*/nullptr);
265*4bdc9457SAndroid Build Coastguard Worker xnn_code_buffer code_buffer;
266*4bdc9457SAndroid Build Coastguard Worker xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE);
267*4bdc9457SAndroid Build Coastguard Worker generator(&code_buffer,
268*4bdc9457SAndroid Build Coastguard Worker mr,
269*4bdc9457SAndroid Build Coastguard Worker group_output_channels % nr,
270*4bdc9457SAndroid Build Coastguard Worker group_input_channels * sizeof(float),
271*4bdc9457SAndroid Build Coastguard Worker kernel_size * mr * sizeof(void *),
272*4bdc9457SAndroid Build Coastguard Worker &jit_params);
273*4bdc9457SAndroid Build Coastguard Worker xnn_finalize_code_memory(&code_buffer);
274*4bdc9457SAndroid Build Coastguard Worker auto igemm = reinterpret_cast<xnn_f32_igemm_minmax_ukernel_function>(code_buffer.start);
275*4bdc9457SAndroid Build Coastguard Worker
276*4bdc9457SAndroid Build Coastguard Worker size_t buffer_index = 0;
277*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
278*4bdc9457SAndroid Build Coastguard Worker state.PauseTiming();
279*4bdc9457SAndroid Build Coastguard Worker benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
280*4bdc9457SAndroid Build Coastguard Worker buffer_index = (buffer_index + 1) % num_buffers;
281*4bdc9457SAndroid Build Coastguard Worker state.ResumeTiming();
282*4bdc9457SAndroid Build Coastguard Worker
283*4bdc9457SAndroid Build Coastguard Worker for (uint32_t m = 0; m < output_size; m += mr) {
284*4bdc9457SAndroid Build Coastguard Worker const uint32_t mb = min(output_size - m, mr);
285*4bdc9457SAndroid Build Coastguard Worker igemm(
286*4bdc9457SAndroid Build Coastguard Worker mb, group_output_channels, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
287*4bdc9457SAndroid Build Coastguard Worker i.data() + buffer_index * i_elements + m,
288*4bdc9457SAndroid Build Coastguard Worker w.data() + buffer_index * w_elements,
289*4bdc9457SAndroid Build Coastguard Worker c.data() + buffer_index * c_elements + m * group_output_channels, group_output_channels * sizeof(float), nr * sizeof(float),
290*4bdc9457SAndroid Build Coastguard Worker 0, z.data(), ¶ms);
291*4bdc9457SAndroid Build Coastguard Worker }
292*4bdc9457SAndroid Build Coastguard Worker }
293*4bdc9457SAndroid Build Coastguard Worker xnn_release_code_memory(&code_buffer);
294*4bdc9457SAndroid Build Coastguard Worker
295*4bdc9457SAndroid Build Coastguard Worker const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
296*4bdc9457SAndroid Build Coastguard Worker if (cpu_frequency != 0) {
297*4bdc9457SAndroid Build Coastguard Worker state.counters["cpufreq"] = cpu_frequency;
298*4bdc9457SAndroid Build Coastguard Worker }
299*4bdc9457SAndroid Build Coastguard Worker
300*4bdc9457SAndroid Build Coastguard Worker state.counters["FLOPS"] = benchmark::Counter(
301*4bdc9457SAndroid Build Coastguard Worker uint64_t(state.iterations()) * 2 *
302*4bdc9457SAndroid Build Coastguard Worker output_height * output_width *
303*4bdc9457SAndroid Build Coastguard Worker group_input_channels * group_output_channels *
304*4bdc9457SAndroid Build Coastguard Worker kernel_height * kernel_width,
305*4bdc9457SAndroid Build Coastguard Worker benchmark::Counter::kIsRate);
306*4bdc9457SAndroid Build Coastguard Worker
307*4bdc9457SAndroid Build Coastguard Worker }
308*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_PLATFORM_JIT
309*4bdc9457SAndroid Build Coastguard Worker
310*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
jit_f32_igemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)311*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
312*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, 1, 8, 1, 1,
313*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
314*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)315*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
316*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8, 1, 1,
317*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
318*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)319*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
320*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75, 4, 8, 1, 1,
321*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
322*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)323*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
324*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8, 1, 1,
325*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
326*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_6x8__aarch64_neonfma_ld128(benchmark::State & state,const char * net)327*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) {
328*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_ld128, 6, 8, 1, 1,
329*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
330*4bdc9457SAndroid Build Coastguard Worker }
331*4bdc9457SAndroid Build Coastguard Worker
332*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_1x8__aarch64_neonfma_cortex_a75)
333*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75)
334*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch64_neonfma_cortex_a75)
335*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75)
336*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_6x8__aarch64_neonfma_ld128)
337*4bdc9457SAndroid Build Coastguard Worker
338*4bdc9457SAndroid Build Coastguard Worker #define BENCHMARK_UPTO_MR_IGEMM(name, max_mr, nr) \
339*4bdc9457SAndroid Build Coastguard Worker static void name(benchmark::State &state, const char *net) { \
340*4bdc9457SAndroid Build Coastguard Worker f32_igemm( \
341*4bdc9457SAndroid Build Coastguard Worker state, \
342*4bdc9457SAndroid Build Coastguard Worker xnn_generate_f32_igemm_ukernel_upto6x8__aarch64_neonfma_prfm_cortex_a75, \
343*4bdc9457SAndroid Build Coastguard Worker max_mr, nr, 1, 1, xnn_init_f32_minmax_scalar_params); \
344*4bdc9457SAndroid Build Coastguard Worker } \
345*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(name)
346*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_UPTO_MR_IGEMM(jit_f32_igemm_upto6x8_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8);
347*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_UPTO_MR_IGEMM(jit_f32_igemm_upto6x8_2x8__aarch64_neonfma_prfm_cortex_a75, 2, 8);
348*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_UPTO_MR_IGEMM(jit_f32_igemm_upto6x8_3x8__aarch64_neonfma_prfm_cortex_a75, 3, 8);
349*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_UPTO_MR_IGEMM(jit_f32_igemm_upto6x8_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8);
350*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_UPTO_MR_IGEMM(jit_f32_igemm_upto6x8_5x8__aarch64_neonfma_prfm_cortex_a75, 5, 8);
351*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_UPTO_MR_IGEMM(jit_f32_igemm_upto6x8_6x8__aarch64_neonfma_prfm_cortex_a75, 6, 8);
352*4bdc9457SAndroid Build Coastguard Worker #undef BENCHMARK_UPTO_MR_IGEMM
353*4bdc9457SAndroid Build Coastguard Worker
354*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
355*4bdc9457SAndroid Build Coastguard Worker
356*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM && XNN_PLATFORM_JIT
jit_f32_igemm_4x8__aarch32_neon_ld64(benchmark::State & state,const char * net)357*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net) {
358*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1,
359*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
360*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch32_neon_cortex_a7(benchmark::State & state,const char * net)361*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net) {
362*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1,
363*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
364*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch32_neon_cortex_a53(benchmark::State & state,const char * net)365*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net) {
366*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1,
367*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
368*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch32_neon_cortex_a55(benchmark::State & state,const char * net)369*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net) {
370*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1,
371*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
372*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State & state,const char * net)373*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State& state, const char* net) {
374*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, 4, 8, 1, 1,
375*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
376*4bdc9457SAndroid Build Coastguard Worker }
jit_f32_igemm_4x8__aarch32_neon_cortex_a75(benchmark::State & state,const char * net)377*4bdc9457SAndroid Build Coastguard Worker static void jit_f32_igemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net) {
378*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1,
379*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
380*4bdc9457SAndroid Build Coastguard Worker }
381*4bdc9457SAndroid Build Coastguard Worker
382*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_ld64)
BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_cortex_a7)383*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_cortex_a7)
384*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_cortex_a53)
385*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_cortex_a55)
386*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_prfm_cortex_a75)
387*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(jit_f32_igemm_4x8__aarch32_neon_cortex_a75)
388*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM && XNN_PLATFORM_JIT
389*4bdc9457SAndroid Build Coastguard Worker
390*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
391*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net) {
392*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1,
393*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
394*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch32_neon_cortex_a7(benchmark::State & state,const char * net)395*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net) {
396*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1,
397*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
398*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch32_neon_cortex_a53(benchmark::State & state,const char * net)399*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net) {
400*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1,
401*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
402*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch32_neon_prfm_cortex_a53(benchmark::State & state,const char * net)403*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_prfm_cortex_a53(benchmark::State& state, const char* net) {
404*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a53, 4, 8, 1, 1,
405*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
406*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch32_neon_cortex_a55(benchmark::State & state,const char * net)407*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net) {
408*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1,
409*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
410*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State & state,const char * net)411*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_prfm_cortex_a75(benchmark::State& state, const char* net) {
412*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, 4, 8, 1, 1,
413*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
414*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch32_neon_cortex_a75(benchmark::State & state,const char * net)415*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net) {
416*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1,
417*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
418*4bdc9457SAndroid Build Coastguard Worker }
419*4bdc9457SAndroid Build Coastguard Worker
420*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_ld64)
BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a7)421*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a7)
422*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a53)
423*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_prfm_cortex_a53)
424*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a55)
425*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_prfm_cortex_a75)
426*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a75)
427*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
428*4bdc9457SAndroid Build Coastguard Worker
429*4bdc9457SAndroid Build Coastguard Worker
430*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
431*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
432*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, 1, 12, 1, 1,
433*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
434*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)435*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
436*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, 1, 8, 1, 1,
437*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
438*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State & state,const char * net)439*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State& state, const char* net) {
440*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a53, 1, 8, 1, 1,
441*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
442*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)443*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
444*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, 1, 8, 1, 1,
445*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
446*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)447*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
448*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8, 1, 1,
449*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
450*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x2__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)451*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x2__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
452*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_cortex_a75, 4, 2, 1, 1,
453*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
454*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x2__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)455*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x2__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
456*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_prfm_cortex_a75, 4, 2, 1, 1,
457*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
458*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x2__aarch64_neonfma_ld64(benchmark::State & state,const char * net)459*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x2__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
460*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x2__aarch64_neonfma_ld64, 4, 2, 1, 1,
461*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
462*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)463*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
464*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, 4, 8, 1, 1,
465*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
466*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State & state,const char * net)467*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State& state, const char* net) {
468*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a53, 4, 8, 1, 1,
469*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
470*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State & state,const char * net)471*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
472*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, 4, 8, 1, 1,
473*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
474*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)475*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
476*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, 4, 8, 1, 1,
477*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
478*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)479*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
480*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8, 1, 1,
481*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
482*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_ld64(benchmark::State & state,const char * net)483*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
484*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, 4, 8, 1, 1,
485*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
486*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__aarch64_neonfma_ld128(benchmark::State & state,const char * net)487*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) {
488*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, 4, 8, 1, 1,
489*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
490*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)491*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
492*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, 5, 8, 1, 1,
493*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
494*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)495*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
496*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, 5, 8, 1, 1,
497*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
498*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)499*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
500*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, 4, 12, 1, 1,
501*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
502*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State & state,const char * net)503*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
504*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, 6, 8, 1, 1,
505*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
506*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State & state,const char * net)507*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a53(benchmark::State& state, const char* net) {
508*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a53, 6, 8, 1, 1,
509*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
510*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State & state,const char * net)511*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
512*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, 6, 8, 1, 1,
513*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
514*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State & state,const char * net)515*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, const char* net) {
516*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, 6, 8, 1, 1,
517*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
518*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State & state,const char * net)519*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
520*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, 6, 8, 1, 1,
521*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
522*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State & state,const char * net)523*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
524*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, 6, 8, 1, 1,
525*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
526*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_ld64(benchmark::State & state,const char * net)527*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
528*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, 6, 8, 1, 1,
529*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
530*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__aarch64_neonfma_ld128(benchmark::State & state,const char * net)531*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) {
532*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, 6, 8, 1, 1,
533*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
534*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__neonfma_lane_ld64(benchmark::State & state,const char * net)535*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
536*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, 1, 8, 1, 1,
537*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
538*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x2__neonfma_lane_ld64(benchmark::State & state,const char * net)539*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x2__neonfma_lane_ld64(benchmark::State& state, const char* net) {
540*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, 4, 2, 1, 1,
541*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
542*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x2__neonfma_lane_ld64(benchmark::State & state,const char * net)543*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x2__neonfma_lane_ld64(benchmark::State& state, const char* net) {
544*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64, 6, 2, 1, 1,
545*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
546*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x4__neonfma_lane_ld64(benchmark::State & state,const char * net)547*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x4__neonfma_lane_ld64(benchmark::State& state, const char* net) {
548*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, 4, 4, 1, 1,
549*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
550*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neonfma_lane_ld128(benchmark::State & state,const char * net)551*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neonfma_lane_ld128(benchmark::State& state, const char* net) {
552*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, 4, 8, 1, 1,
553*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
554*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neonfma_lane_ld64(benchmark::State & state,const char * net)555*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
556*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, 4, 8, 1, 1,
557*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
558*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neonfma_lane_ld64(benchmark::State & state,const char * net)559*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
560*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, 6, 8, 1, 1,
561*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
562*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neonfma_lane_ld128(benchmark::State & state,const char * net)563*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neonfma_lane_ld128(benchmark::State& state, const char* net) {
564*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, 6, 8, 1, 1,
565*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
566*4bdc9457SAndroid Build Coastguard Worker }
567*4bdc9457SAndroid Build Coastguard Worker
568*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_cortex_a53)
BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a53)569*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a53)
570*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_cortex_a75)
571*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75)
572*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x12__aarch64_neonfma_cortex_a53)
573*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x2__aarch64_neonfma_cortex_a75)
574*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x2__aarch64_neonfma_prfm_cortex_a75)
575*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x2__aarch64_neonfma_ld64)
576*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_cortex_a53)
577*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a53)
578*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_cortex_a55)
579*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_cortex_a75)
580*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75)
581*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_ld64)
582*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_ld128)
583*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x12__aarch64_neonfma_cortex_a53)
584*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__aarch64_neonfma_cortex_a75)
585*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__aarch64_neonfma_prfm_cortex_a75)
586*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a53)
587*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a53)
588*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a55)
589*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a73)
590*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a75)
591*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a75)
592*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_ld64)
593*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_ld128)
594*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__neonfma_lane_ld64)
595*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x2__neonfma_lane_ld64)
596*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x2__neonfma_lane_ld64)
597*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x4__neonfma_lane_ld64)
598*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neonfma_lane_ld128)
599*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neonfma_lane_ld64)
600*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neonfma_lane_ld64)
601*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neonfma_lane_ld128)
602*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
603*4bdc9457SAndroid Build Coastguard Worker
604*4bdc9457SAndroid Build Coastguard Worker
605*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
606*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__neon_lane_ld64(benchmark::State& state, const char* net) {
607*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1,
608*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
609*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x2__neon_lane_ld64(benchmark::State & state,const char * net)610*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x2__neon_lane_ld64(benchmark::State& state, const char* net) {
611*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, 4, 2, 1, 1,
612*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
613*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x2__neon_lane_ld64(benchmark::State & state,const char * net)614*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x2__neon_lane_ld64(benchmark::State& state, const char* net) {
615*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64, 6, 2, 1, 1,
616*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
617*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x4__neon_lane_ld64(benchmark::State & state,const char * net)618*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x4__neon_lane_ld64(benchmark::State& state, const char* net) {
619*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, 4, 4, 1, 1,
620*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
621*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neon_lane_ld64(benchmark::State & state,const char * net)622*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neon_lane_ld64(benchmark::State& state, const char* net) {
623*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1,
624*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
625*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neon_lane_ld128(benchmark::State & state,const char * net)626*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neon_lane_ld128(benchmark::State& state, const char* net) {
627*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1,
628*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
629*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neon_lane_ld64(benchmark::State & state,const char * net)630*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neon_lane_ld64(benchmark::State& state, const char* net) {
631*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1,
632*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
633*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neon_lane_ld128(benchmark::State & state,const char * net)634*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neon_lane_ld128(benchmark::State& state, const char* net) {
635*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, 6, 8, 1, 1,
636*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
637*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__neon_dup_ld64(benchmark::State & state,const char * net)638*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__neon_dup_ld64(benchmark::State& state, const char* net) {
639*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, 1, 8, 1, 1,
640*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
641*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neon_dup_ld128(benchmark::State & state,const char * net)642*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neon_dup_ld128(benchmark::State& state, const char* net) {
643*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, 4, 8, 1, 1,
644*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
645*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neon_dup_ld64(benchmark::State & state,const char * net)646*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neon_dup_ld64(benchmark::State& state, const char* net) {
647*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, 4, 8, 1, 1,
648*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
649*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neon_dup_ld64(benchmark::State & state,const char * net)650*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neon_dup_ld64(benchmark::State& state, const char* net) {
651*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, 6, 8, 1, 1,
652*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
653*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neon_dup_ld128(benchmark::State & state,const char * net)654*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neon_dup_ld128(benchmark::State& state, const char* net) {
655*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, 6, 8, 1, 1,
656*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
657*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8__neonfma_dup_ld64(benchmark::State & state,const char * net)658*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
659*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1,
660*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
661*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neonfma_dup_ld128(benchmark::State & state,const char * net)662*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
663*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1,
664*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
665*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__neonfma_dup_ld64(benchmark::State & state,const char * net)666*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
667*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1,
668*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
669*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neonfma_dup_ld64(benchmark::State & state,const char * net)670*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
671*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1,
672*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
673*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__neonfma_dup_ld128(benchmark::State & state,const char * net)674*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
675*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, 6, 8, 1, 1,
676*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
677*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8s4__neon(benchmark::State & state,const char * net)678*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8s4__neon(benchmark::State& state, const char* net) {
679*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8s4__neon, 1, 8, 1, 4,
680*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
681*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8s4__neon(benchmark::State & state,const char * net)682*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8s4__neon(benchmark::State& state, const char* net) {
683*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8s4__neon, 4, 8, 1, 4,
684*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
685*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8s4__neon(benchmark::State & state,const char * net)686*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8s4__neon(benchmark::State& state, const char* net) {
687*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8s4__neon, 6, 8, 1, 4,
688*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
689*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_8x8s4__neon(benchmark::State & state,const char * net)690*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_8x8s4__neon(benchmark::State& state, const char* net) {
691*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_8x8s4__neon, 8, 8, 1, 4,
692*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
693*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_1x8s4__neonfma(benchmark::State & state,const char * net)694*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8s4__neonfma(benchmark::State& state, const char* net) {
695*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, 1, 8, 1, 4,
696*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
697*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8s4__neonfma(benchmark::State & state,const char * net)698*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8s4__neonfma(benchmark::State& state, const char* net) {
699*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, 4, 8, 1, 4,
700*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
701*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8s4__neonfma(benchmark::State & state,const char * net)702*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8s4__neonfma(benchmark::State& state, const char* net) {
703*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, 6, 8, 1, 4,
704*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
705*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_8x8s4__neonfma(benchmark::State & state,const char * net)706*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_8x8s4__neonfma(benchmark::State& state, const char* net) {
707*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, 8, 8, 1, 4,
708*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
709*4bdc9457SAndroid Build Coastguard Worker }
710*4bdc9457SAndroid Build Coastguard Worker
711*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__neon_lane_ld64)
BENCHMARK_CONV(f32_igemm_4x2__neon_lane_ld64)712*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x2__neon_lane_ld64)
713*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x2__neon_lane_ld64)
714*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x4__neon_lane_ld64)
715*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neon_lane_ld128)
716*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neon_lane_ld64)
717*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neon_lane_ld64)
718*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neon_lane_ld128)
719*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__neon_dup_ld64)
720*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neon_dup_ld128)
721*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neon_dup_ld64)
722*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neon_dup_ld64)
723*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neon_dup_ld128)
724*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__neonfma_dup_ld64)
725*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neonfma_dup_ld128)
726*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__neonfma_dup_ld64)
727*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neonfma_dup_ld64)
728*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__neonfma_dup_ld128)
729*4bdc9457SAndroid Build Coastguard Worker
730*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8s4__neon)
731*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8s4__neon)
732*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8s4__neon)
733*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_8x8s4__neon)
734*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8s4__neonfma)
735*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8s4__neonfma)
736*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8s4__neonfma)
737*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_8x8s4__neonfma)
738*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
739*4bdc9457SAndroid Build Coastguard Worker
740*4bdc9457SAndroid Build Coastguard Worker
741*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
742*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__sse_load1(benchmark::State& state, const char* net) {
743*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, 1, 8, 1, 1,
744*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
745*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_3x8__sse_load1(benchmark::State & state,const char * net)746*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__sse_load1(benchmark::State& state, const char* net) {
747*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, 3, 8, 1, 1,
748*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
749*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__sse_load1(benchmark::State & state,const char * net)750*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__sse_load1(benchmark::State& state, const char* net) {
751*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, 4, 8, 1, 1,
752*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
753*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__sse_load1(benchmark::State & state,const char * net)754*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__sse_load1(benchmark::State& state, const char* net) {
755*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, 5, 8, 1, 1,
756*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
757*4bdc9457SAndroid Build Coastguard Worker }
758*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_1x8__sse_dup(benchmark::State & state,const char * net)759*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__sse_dup(benchmark::State& state, const char* net) {
760*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, 1, 8, 1, 1,
761*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
762*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_3x8__sse_dup(benchmark::State & state,const char * net)763*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__sse_dup(benchmark::State& state, const char* net) {
764*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, 3, 8, 1, 1,
765*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
766*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__sse_dup(benchmark::State & state,const char * net)767*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__sse_dup(benchmark::State& state, const char* net) {
768*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, 4, 8, 1, 1,
769*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
770*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__sse_dup(benchmark::State & state,const char * net)771*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__sse_dup(benchmark::State& state, const char* net) {
772*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, 5, 8, 1, 1,
773*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
774*4bdc9457SAndroid Build Coastguard Worker }
775*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_1x8s4__sse(benchmark::State & state,const char * net)776*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8s4__sse(benchmark::State& state, const char* net) {
777*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8s4__sse, 1, 8, 1, 4,
778*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
779*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_3x8s4__sse(benchmark::State & state,const char * net)780*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8s4__sse(benchmark::State& state, const char* net) {
781*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8s4__sse, 3, 8, 1, 4,
782*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
783*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8s4__sse(benchmark::State & state,const char * net)784*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8s4__sse(benchmark::State& state, const char* net) {
785*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8s4__sse, 4, 8, 1, 4,
786*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
787*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8s4__sse(benchmark::State & state,const char * net)788*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8s4__sse(benchmark::State& state, const char* net) {
789*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8s4__sse, 5, 8, 1, 4,
790*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
791*4bdc9457SAndroid Build Coastguard Worker }
792*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_1x8__sse2_dup(benchmark::State & state,const char * net)793*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__sse2_dup(benchmark::State& state, const char* net) {
794*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, 1, 8, 1, 1,
795*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
796*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_3x8__sse2_dup(benchmark::State & state,const char * net)797*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__sse2_dup(benchmark::State& state, const char* net) {
798*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, 3, 8, 1, 1,
799*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
800*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__sse2_dup(benchmark::State & state,const char * net)801*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__sse2_dup(benchmark::State& state, const char* net) {
802*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, 4, 8, 1, 1,
803*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
804*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__sse2_dup(benchmark::State & state,const char * net)805*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__sse2_dup(benchmark::State& state, const char* net) {
806*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, 5, 8, 1, 1,
807*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_sse_params);
808*4bdc9457SAndroid Build Coastguard Worker }
809*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_1x8__avx_broadcast(benchmark::State & state,const char * net)810*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__avx_broadcast(benchmark::State& state, const char* net) {
811*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, 1, 8, 1, 1,
812*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
813*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__avx_broadcast(benchmark::State & state,const char * net)814*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__avx_broadcast(benchmark::State& state, const char* net) {
815*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, 4, 8, 1, 1,
816*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
817*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__avx_broadcast(benchmark::State & state,const char * net)818*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__avx_broadcast(benchmark::State& state, const char* net) {
819*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, 5, 8, 1, 1,
820*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
821*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__avx_broadcast(benchmark::State & state,const char * net)822*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__avx_broadcast(benchmark::State& state, const char* net) {
823*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, 6, 8, 1, 1,
824*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
825*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_7x8__avx_broadcast(benchmark::State & state,const char * net)826*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_7x8__avx_broadcast(benchmark::State& state, const char* net) {
827*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, 7, 8, 1, 1,
828*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
829*4bdc9457SAndroid Build Coastguard Worker }
830*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_1x8__fma3_broadcast(benchmark::State & state,const char * net)831*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x8__fma3_broadcast(benchmark::State& state, const char* net) {
832*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1,
833*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
834*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x8__fma3_broadcast(benchmark::State & state,const char * net)835*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__fma3_broadcast(benchmark::State& state, const char* net) {
836*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1,
837*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
838*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x8__fma3_broadcast(benchmark::State & state,const char * net)839*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__fma3_broadcast(benchmark::State& state, const char* net) {
840*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1,
841*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
842*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x8__fma3_broadcast(benchmark::State & state,const char * net)843*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__fma3_broadcast(benchmark::State& state, const char* net) {
844*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1,
845*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
846*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_7x8__fma3_broadcast(benchmark::State & state,const char * net)847*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_7x8__fma3_broadcast(benchmark::State& state, const char* net) {
848*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1,
849*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
850*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_8x8__fma3_broadcast(benchmark::State & state,const char * net)851*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_8x8__fma3_broadcast(benchmark::State& state, const char* net) {
852*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1,
853*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
854*4bdc9457SAndroid Build Coastguard Worker }
855*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_1x16__avx512f_broadcast(benchmark::State & state,const char * net)856*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x16__avx512f_broadcast(benchmark::State& state, const char* net) {
857*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, 1, 16, 1, 1,
858*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
859*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_4x16__avx512f_broadcast(benchmark::State & state,const char * net)860*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x16__avx512f_broadcast(benchmark::State& state, const char* net) {
861*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, 4, 16, 1, 1,
862*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
863*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_5x16__avx512f_broadcast(benchmark::State & state,const char * net)864*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x16__avx512f_broadcast(benchmark::State& state, const char* net) {
865*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, 5, 16, 1, 1,
866*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
867*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_6x16__avx512f_broadcast(benchmark::State & state,const char * net)868*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x16__avx512f_broadcast(benchmark::State& state, const char* net) {
869*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, 6, 16, 1, 1,
870*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
871*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_7x16__avx512f_broadcast(benchmark::State & state,const char * net)872*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_7x16__avx512f_broadcast(benchmark::State& state, const char* net) {
873*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, 7, 16, 1, 1,
874*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
875*4bdc9457SAndroid Build Coastguard Worker }
f32_igemm_8x16__avx512f_broadcast(benchmark::State & state,const char * net)876*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_8x16__avx512f_broadcast(benchmark::State& state, const char* net) {
877*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, 8, 16, 1, 1,
878*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
879*4bdc9457SAndroid Build Coastguard Worker }
880*4bdc9457SAndroid Build Coastguard Worker
881*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__sse_load1)
BENCHMARK_CONV(f32_igemm_3x8__sse_load1)882*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__sse_load1)
883*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__sse_load1)
884*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__sse_load1)
885*4bdc9457SAndroid Build Coastguard Worker
886*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__sse_dup)
887*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__sse_dup)
888*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__sse_dup)
889*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__sse_dup)
890*4bdc9457SAndroid Build Coastguard Worker
891*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8s4__sse)
892*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8s4__sse)
893*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8s4__sse)
894*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8s4__sse)
895*4bdc9457SAndroid Build Coastguard Worker
896*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__sse2_dup)
897*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__sse2_dup)
898*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__sse2_dup)
899*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__sse2_dup)
900*4bdc9457SAndroid Build Coastguard Worker
901*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__avx_broadcast)
902*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__avx_broadcast)
903*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__avx_broadcast)
904*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__avx_broadcast)
905*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_7x8__avx_broadcast)
906*4bdc9457SAndroid Build Coastguard Worker
907*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x8__fma3_broadcast)
908*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__fma3_broadcast)
909*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__fma3_broadcast)
910*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__fma3_broadcast)
911*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_7x8__fma3_broadcast)
912*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_8x8__fma3_broadcast)
913*4bdc9457SAndroid Build Coastguard Worker
914*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x16__avx512f_broadcast)
915*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x16__avx512f_broadcast)
916*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x16__avx512f_broadcast)
917*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x16__avx512f_broadcast)
918*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_7x16__avx512f_broadcast)
919*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_8x16__avx512f_broadcast)
920*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
921*4bdc9457SAndroid Build Coastguard Worker
922*4bdc9457SAndroid Build Coastguard Worker
923*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
924*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
925*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, 3, 8, 1, 1,
926*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
927*4bdc9457SAndroid Build Coastguard Worker }
928*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x8__wasmsimd_arm_loadsplat(benchmark::State & state,const char * net)929*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
930*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, 4, 8, 1, 1,
931*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
932*4bdc9457SAndroid Build Coastguard Worker }
933*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_5x8__wasmsimd_arm_loadsplat(benchmark::State & state,const char * net)934*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
935*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, 5, 8, 1, 1,
936*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
937*4bdc9457SAndroid Build Coastguard Worker }
938*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_6x8__wasmsimd_arm_loadsplat(benchmark::State & state,const char * net)939*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
940*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, 6, 8, 1, 1,
941*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
942*4bdc9457SAndroid Build Coastguard Worker }
943*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_3x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)944*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
945*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, 3, 8, 1, 1,
946*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
947*4bdc9457SAndroid Build Coastguard Worker }
948*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)949*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
950*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, 4, 8, 1, 1,
951*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
952*4bdc9457SAndroid Build Coastguard Worker }
953*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_5x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)954*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
955*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, 5, 8, 1, 1,
956*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
957*4bdc9457SAndroid Build Coastguard Worker }
958*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_6x8__wasmsimd_x86_loadsplat(benchmark::State & state,const char * net)959*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
960*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, 6, 8, 1, 1,
961*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
962*4bdc9457SAndroid Build Coastguard Worker }
963*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_3x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)964*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
965*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, 3, 8, 1, 1,
966*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
967*4bdc9457SAndroid Build Coastguard Worker }
968*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)969*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
970*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, 4, 8, 1, 1,
971*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
972*4bdc9457SAndroid Build Coastguard Worker }
973*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_5x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)974*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
975*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, 5, 8, 1, 1,
976*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
977*4bdc9457SAndroid Build Coastguard Worker }
978*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_6x8__wasmsimd_arm_splat(benchmark::State & state,const char * net)979*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
980*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, 6, 8, 1, 1,
981*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
982*4bdc9457SAndroid Build Coastguard Worker }
983*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_3x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)984*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
985*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, 3, 8, 1, 1,
986*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
987*4bdc9457SAndroid Build Coastguard Worker }
988*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)989*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
990*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, 4, 8, 1, 1,
991*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
992*4bdc9457SAndroid Build Coastguard Worker }
993*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_5x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)994*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
995*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, 5, 8, 1, 1,
996*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
997*4bdc9457SAndroid Build Coastguard Worker }
998*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_6x8__wasmsimd_x86_splat(benchmark::State & state,const char * net)999*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
1000*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, 6, 8, 1, 1,
1001*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1002*4bdc9457SAndroid Build Coastguard Worker }
1003*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_3x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1004*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1005*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, 3, 8, 1, 4,
1006*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1007*4bdc9457SAndroid Build Coastguard Worker }
1008*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1009*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1010*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, 4, 8, 1, 4,
1011*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1012*4bdc9457SAndroid Build Coastguard Worker }
1013*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_5x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1014*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1015*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, 5, 8, 1, 4,
1016*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1017*4bdc9457SAndroid Build Coastguard Worker }
1018*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_6x8s4__wasmsimd_arm(benchmark::State & state,const char * net)1019*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
1020*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, 6, 8, 1, 4,
1021*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1022*4bdc9457SAndroid Build Coastguard Worker }
1023*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_3x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1024*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_3x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1025*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, 3, 8, 1, 4,
1026*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1027*4bdc9457SAndroid Build Coastguard Worker }
1028*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1029*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1030*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, 4, 8, 1, 4,
1031*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1032*4bdc9457SAndroid Build Coastguard Worker }
1033*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_5x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1034*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_5x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1035*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, 5, 8, 1, 4,
1036*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1037*4bdc9457SAndroid Build Coastguard Worker }
1038*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_6x8s4__wasmsimd_x86(benchmark::State & state,const char * net)1039*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_6x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
1040*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, 6, 8, 1, 4,
1041*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1042*4bdc9457SAndroid Build Coastguard Worker }
1043*4bdc9457SAndroid Build Coastguard Worker
1044*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_arm_loadsplat)
BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_arm_loadsplat)1045*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_arm_loadsplat)
1046*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_arm_loadsplat)
1047*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_arm_loadsplat)
1048*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_x86_loadsplat)
1049*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_x86_loadsplat)
1050*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_x86_loadsplat)
1051*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_x86_loadsplat)
1052*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_arm_splat)
1053*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_arm_splat)
1054*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_arm_splat)
1055*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_arm_splat)
1056*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_x86_splat)
1057*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_x86_splat)
1058*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_x86_splat)
1059*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_x86_splat)
1060*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8s4__wasmsimd_arm)
1061*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8s4__wasmsimd_arm)
1062*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8s4__wasmsimd_arm)
1063*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8s4__wasmsimd_arm)
1064*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_3x8s4__wasmsimd_x86)
1065*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x8s4__wasmsimd_x86)
1066*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_5x8s4__wasmsimd_x86)
1067*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_6x8s4__wasmsimd_x86)
1068*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1069*4bdc9457SAndroid Build Coastguard Worker
1070*4bdc9457SAndroid Build Coastguard Worker
1071*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_1x4__scalar(benchmark::State& state, const char* net) {
1072*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_1x4__scalar, 1, 4, 1, 1,
1073*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1074*4bdc9457SAndroid Build Coastguard Worker }
1075*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_2x4__scalar(benchmark::State & state,const char * net)1076*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_2x4__scalar(benchmark::State& state, const char* net) {
1077*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_2x4__scalar, 2, 4, 1, 1,
1078*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1079*4bdc9457SAndroid Build Coastguard Worker }
1080*4bdc9457SAndroid Build Coastguard Worker
f32_igemm_4x4__scalar(benchmark::State & state,const char * net)1081*4bdc9457SAndroid Build Coastguard Worker static void f32_igemm_4x4__scalar(benchmark::State& state, const char* net) {
1082*4bdc9457SAndroid Build Coastguard Worker f32_igemm(state, xnn_f32_igemm_minmax_ukernel_4x4__scalar, 4, 4, 1, 1,
1083*4bdc9457SAndroid Build Coastguard Worker xnn_init_f32_minmax_scalar_params);
1084*4bdc9457SAndroid Build Coastguard Worker }
1085*4bdc9457SAndroid Build Coastguard Worker
1086*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_1x4__scalar)
1087*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_2x4__scalar)
1088*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_CONV(f32_igemm_4x4__scalar)
1089*4bdc9457SAndroid Build Coastguard Worker
1090*4bdc9457SAndroid Build Coastguard Worker #ifndef XNNPACK_BENCHMARK_NO_MAIN
1091*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_MAIN();
1092*4bdc9457SAndroid Build Coastguard Worker #endif
1093