xref: /aosp_15_r20/external/XNNPACK/bench/qu8-requantization.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
10*4bdc9457SAndroid Build Coastguard Worker #include <cfloat>
11*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
12*4bdc9457SAndroid Build Coastguard Worker #include <functional>
13*4bdc9457SAndroid Build Coastguard Worker #include <random>
14*4bdc9457SAndroid Build Coastguard Worker #include <vector>
15*4bdc9457SAndroid Build Coastguard Worker 
16*4bdc9457SAndroid Build Coastguard Worker #include <cpuinfo.h>
17*4bdc9457SAndroid Build Coastguard Worker 
18*4bdc9457SAndroid Build Coastguard Worker #include <benchmark/benchmark.h>
19*4bdc9457SAndroid Build Coastguard Worker #include "bench/utils.h"
20*4bdc9457SAndroid Build Coastguard Worker 
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/aligned-allocator.h>
22*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
23*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/requantization-stubs.h>
24*4bdc9457SAndroid Build Coastguard Worker 
25*4bdc9457SAndroid Build Coastguard Worker 
26*4bdc9457SAndroid Build Coastguard Worker class Requantization : public benchmark::Fixture {
27*4bdc9457SAndroid Build Coastguard Worker  public:
Requantization()28*4bdc9457SAndroid Build Coastguard Worker   inline Requantization()
29*4bdc9457SAndroid Build Coastguard Worker   {
30*4bdc9457SAndroid Build Coastguard Worker     cpuinfo_initialize();
31*4bdc9457SAndroid Build Coastguard Worker     const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
32*4bdc9457SAndroid Build Coastguard Worker     const size_t l1d_reserve = 1024;
33*4bdc9457SAndroid Build Coastguard Worker     n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
34*4bdc9457SAndroid Build Coastguard Worker     n_ = n_ / 16 * 16;
35*4bdc9457SAndroid Build Coastguard Worker   }
36*4bdc9457SAndroid Build Coastguard Worker 
SetUp(benchmark::State & state)37*4bdc9457SAndroid Build Coastguard Worker   virtual void SetUp(benchmark::State& state) override
38*4bdc9457SAndroid Build Coastguard Worker   {
39*4bdc9457SAndroid Build Coastguard Worker     std::random_device random_device;
40*4bdc9457SAndroid Build Coastguard Worker     auto rng = std::mt19937(random_device());
41*4bdc9457SAndroid Build Coastguard Worker     auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
42*4bdc9457SAndroid Build Coastguard Worker 
43*4bdc9457SAndroid Build Coastguard Worker     input_.resize(n());
44*4bdc9457SAndroid Build Coastguard Worker     std::generate(input_.begin(), input_.end(), std::ref(i32rng));
45*4bdc9457SAndroid Build Coastguard Worker     output_.resize(n());
46*4bdc9457SAndroid Build Coastguard Worker     std::fill(output_.begin(), output_.end(), 0xA5);
47*4bdc9457SAndroid Build Coastguard Worker 
48*4bdc9457SAndroid Build Coastguard Worker     const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
49*4bdc9457SAndroid Build Coastguard Worker     if (cpu_frequency != 0) {
50*4bdc9457SAndroid Build Coastguard Worker       state.counters["cpufreq"] = cpu_frequency;
51*4bdc9457SAndroid Build Coastguard Worker     }
52*4bdc9457SAndroid Build Coastguard Worker   }
53*4bdc9457SAndroid Build Coastguard Worker 
TearDown(benchmark::State & state)54*4bdc9457SAndroid Build Coastguard Worker   virtual void TearDown(benchmark::State& state) override
55*4bdc9457SAndroid Build Coastguard Worker   {
56*4bdc9457SAndroid Build Coastguard Worker     state.SetItemsProcessed(uint64_t(state.iterations()) * n());
57*4bdc9457SAndroid Build Coastguard Worker     state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
58*4bdc9457SAndroid Build Coastguard Worker     input_.clear();
59*4bdc9457SAndroid Build Coastguard Worker     output_.clear();
60*4bdc9457SAndroid Build Coastguard Worker   }
61*4bdc9457SAndroid Build Coastguard Worker 
input() const62*4bdc9457SAndroid Build Coastguard Worker   inline const int32_t* input() const
63*4bdc9457SAndroid Build Coastguard Worker   {
64*4bdc9457SAndroid Build Coastguard Worker     return input_.data();
65*4bdc9457SAndroid Build Coastguard Worker   }
66*4bdc9457SAndroid Build Coastguard Worker 
output()67*4bdc9457SAndroid Build Coastguard Worker   inline uint8_t* output()
68*4bdc9457SAndroid Build Coastguard Worker   {
69*4bdc9457SAndroid Build Coastguard Worker     return output_.data();
70*4bdc9457SAndroid Build Coastguard Worker   }
71*4bdc9457SAndroid Build Coastguard Worker 
n() const72*4bdc9457SAndroid Build Coastguard Worker   inline size_t n() const
73*4bdc9457SAndroid Build Coastguard Worker   {
74*4bdc9457SAndroid Build Coastguard Worker     return n_;
75*4bdc9457SAndroid Build Coastguard Worker   }
76*4bdc9457SAndroid Build Coastguard Worker 
77*4bdc9457SAndroid Build Coastguard Worker  protected:
78*4bdc9457SAndroid Build Coastguard Worker   std::vector<int32_t, AlignedAllocator<int32_t, 64>> input_;
79*4bdc9457SAndroid Build Coastguard Worker   std::vector<uint8_t> output_;
80*4bdc9457SAndroid Build Coastguard Worker   size_t n_;
81*4bdc9457SAndroid Build Coastguard Worker };
82*4bdc9457SAndroid Build Coastguard Worker 
83*4bdc9457SAndroid Build Coastguard Worker 
84*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_F(Requantization,fp32__neon)85*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
86*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
87*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_fp32__neon(
88*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
89*4bdc9457SAndroid Build Coastguard Worker     }
90*4bdc9457SAndroid Build Coastguard Worker   }
91*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,gemmlowp__neon)92*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
93*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
94*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_gemmlowp__neon(
95*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
96*4bdc9457SAndroid Build Coastguard Worker     }
97*4bdc9457SAndroid Build Coastguard Worker   }
98*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__neon)99*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, rndna__neon)(benchmark::State& state) {
100*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
101*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_rndna__neon(
102*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
103*4bdc9457SAndroid Build Coastguard Worker     }
104*4bdc9457SAndroid Build Coastguard Worker   }
105*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
106*4bdc9457SAndroid Build Coastguard Worker 
107*4bdc9457SAndroid Build Coastguard Worker 
108*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_F(Requantization,fp32__sse2)109*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
110*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
111*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_fp32__sse2(
112*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
113*4bdc9457SAndroid Build Coastguard Worker     }
114*4bdc9457SAndroid Build Coastguard Worker   }
115*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,gemmlowp__sse2)116*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
117*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
118*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_gemmlowp__sse2(
119*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
120*4bdc9457SAndroid Build Coastguard Worker     }
121*4bdc9457SAndroid Build Coastguard Worker   }
122*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,gemmlowp__ssse3)123*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
124*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
125*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_gemmlowp__ssse3(
126*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
127*4bdc9457SAndroid Build Coastguard Worker     }
128*4bdc9457SAndroid Build Coastguard Worker   }
129*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,gemmlowp__sse4)130*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
131*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
132*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_gemmlowp__sse4(
133*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
134*4bdc9457SAndroid Build Coastguard Worker     }
135*4bdc9457SAndroid Build Coastguard Worker   }
136*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__sse2)137*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, rndna__sse2)(benchmark::State& state) {
138*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
139*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_rndna__sse2(
140*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
141*4bdc9457SAndroid Build Coastguard Worker     }
142*4bdc9457SAndroid Build Coastguard Worker   }
143*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__ssse3)144*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, rndna__ssse3)(benchmark::State& state) {
145*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
146*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_rndna__ssse3(
147*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
148*4bdc9457SAndroid Build Coastguard Worker     }
149*4bdc9457SAndroid Build Coastguard Worker   }
150*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__sse4)151*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, rndna__sse4)(benchmark::State& state) {
152*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
153*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_rndna__sse4(
154*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
155*4bdc9457SAndroid Build Coastguard Worker     }
156*4bdc9457SAndroid Build Coastguard Worker   }
157*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
158*4bdc9457SAndroid Build Coastguard Worker 
159*4bdc9457SAndroid Build Coastguard Worker 
160*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_F(Requantization,fp32__wasmsimd)161*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
162*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
163*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_fp32__wasmsimd(
164*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
165*4bdc9457SAndroid Build Coastguard Worker     }
166*4bdc9457SAndroid Build Coastguard Worker   }
167*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,gemmlowp__wasmsimd)168*4bdc9457SAndroid Build Coastguard Worker   BENCHMARK_F(Requantization, gemmlowp__wasmsimd)(benchmark::State& state) {
169*4bdc9457SAndroid Build Coastguard Worker     for (auto _ : state) {
170*4bdc9457SAndroid Build Coastguard Worker       xnn_qu8_requantize_gemmlowp__wasmsimd(
171*4bdc9457SAndroid Build Coastguard Worker           n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
172*4bdc9457SAndroid Build Coastguard Worker     }
173*4bdc9457SAndroid Build Coastguard Worker   }
174*4bdc9457SAndroid Build Coastguard Worker #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
175*4bdc9457SAndroid Build Coastguard Worker 
176*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,fp32__scalar_lrintf)177*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
178*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
179*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_requantize_fp32__scalar_lrintf(
180*4bdc9457SAndroid Build Coastguard Worker         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
181*4bdc9457SAndroid Build Coastguard Worker   }
182*4bdc9457SAndroid Build Coastguard Worker }
183*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,fp32__scalar_fmagic)184*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__scalar_fmagic)(benchmark::State& state) {
185*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
186*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_requantize_fp32__scalar_fmagic(
187*4bdc9457SAndroid Build Coastguard Worker         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
188*4bdc9457SAndroid Build Coastguard Worker   }
189*4bdc9457SAndroid Build Coastguard Worker }
190*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,gemmlowp__scalar)191*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
192*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
193*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_requantize_gemmlowp__scalar(
194*4bdc9457SAndroid Build Coastguard Worker         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
195*4bdc9457SAndroid Build Coastguard Worker   }
196*4bdc9457SAndroid Build Coastguard Worker }
197*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__scalar_signed64)198*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__scalar_signed64)(benchmark::State& state) {
199*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
200*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_requantize_rndna__scalar_signed64(
201*4bdc9457SAndroid Build Coastguard Worker         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
202*4bdc9457SAndroid Build Coastguard Worker   }
203*4bdc9457SAndroid Build Coastguard Worker }
204*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__scalar_unsigned32)205*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__scalar_unsigned32)(benchmark::State& state) {
206*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
207*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_requantize_rndna__scalar_unsigned32(
208*4bdc9457SAndroid Build Coastguard Worker         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
209*4bdc9457SAndroid Build Coastguard Worker   }
210*4bdc9457SAndroid Build Coastguard Worker }
211*4bdc9457SAndroid Build Coastguard Worker 
BENCHMARK_F(Requantization,rndna__scalar_unsigned64)212*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__scalar_unsigned64)(benchmark::State& state) {
213*4bdc9457SAndroid Build Coastguard Worker   for (auto _ : state) {
214*4bdc9457SAndroid Build Coastguard Worker     xnn_qu8_requantize_rndna__scalar_unsigned64(
215*4bdc9457SAndroid Build Coastguard Worker         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
216*4bdc9457SAndroid Build Coastguard Worker   }
217*4bdc9457SAndroid Build Coastguard Worker }
218*4bdc9457SAndroid Build Coastguard Worker 
219*4bdc9457SAndroid Build Coastguard Worker 
220*4bdc9457SAndroid Build Coastguard Worker #ifndef XNNPACK_BENCHMARK_NO_MAIN
221*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_MAIN();
222*4bdc9457SAndroid Build Coastguard Worker #endif
223