1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker
9*4bdc9457SAndroid Build Coastguard Worker #include <algorithm>
10*4bdc9457SAndroid Build Coastguard Worker #include <cfloat>
11*4bdc9457SAndroid Build Coastguard Worker #include <cmath>
12*4bdc9457SAndroid Build Coastguard Worker #include <functional>
13*4bdc9457SAndroid Build Coastguard Worker #include <random>
14*4bdc9457SAndroid Build Coastguard Worker #include <vector>
15*4bdc9457SAndroid Build Coastguard Worker
16*4bdc9457SAndroid Build Coastguard Worker #include <cpuinfo.h>
17*4bdc9457SAndroid Build Coastguard Worker
18*4bdc9457SAndroid Build Coastguard Worker #include <benchmark/benchmark.h>
19*4bdc9457SAndroid Build Coastguard Worker #include "bench/utils.h"
20*4bdc9457SAndroid Build Coastguard Worker
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/aligned-allocator.h>
22*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
23*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/requantization-stubs.h>
24*4bdc9457SAndroid Build Coastguard Worker
25*4bdc9457SAndroid Build Coastguard Worker
26*4bdc9457SAndroid Build Coastguard Worker class Requantization : public benchmark::Fixture {
27*4bdc9457SAndroid Build Coastguard Worker public:
Requantization()28*4bdc9457SAndroid Build Coastguard Worker inline Requantization()
29*4bdc9457SAndroid Build Coastguard Worker {
30*4bdc9457SAndroid Build Coastguard Worker cpuinfo_initialize();
31*4bdc9457SAndroid Build Coastguard Worker const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
32*4bdc9457SAndroid Build Coastguard Worker const size_t l1d_reserve = 1024;
33*4bdc9457SAndroid Build Coastguard Worker n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
34*4bdc9457SAndroid Build Coastguard Worker n_ = n_ / 16 * 16;
35*4bdc9457SAndroid Build Coastguard Worker }
36*4bdc9457SAndroid Build Coastguard Worker
SetUp(benchmark::State & state)37*4bdc9457SAndroid Build Coastguard Worker virtual void SetUp(benchmark::State& state) override
38*4bdc9457SAndroid Build Coastguard Worker {
39*4bdc9457SAndroid Build Coastguard Worker std::random_device random_device;
40*4bdc9457SAndroid Build Coastguard Worker auto rng = std::mt19937(random_device());
41*4bdc9457SAndroid Build Coastguard Worker auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
42*4bdc9457SAndroid Build Coastguard Worker
43*4bdc9457SAndroid Build Coastguard Worker input_.resize(n());
44*4bdc9457SAndroid Build Coastguard Worker std::generate(input_.begin(), input_.end(), std::ref(i32rng));
45*4bdc9457SAndroid Build Coastguard Worker output_.resize(n());
46*4bdc9457SAndroid Build Coastguard Worker std::fill(output_.begin(), output_.end(), 0xA5);
47*4bdc9457SAndroid Build Coastguard Worker
48*4bdc9457SAndroid Build Coastguard Worker const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
49*4bdc9457SAndroid Build Coastguard Worker if (cpu_frequency != 0) {
50*4bdc9457SAndroid Build Coastguard Worker state.counters["cpufreq"] = cpu_frequency;
51*4bdc9457SAndroid Build Coastguard Worker }
52*4bdc9457SAndroid Build Coastguard Worker }
53*4bdc9457SAndroid Build Coastguard Worker
TearDown(benchmark::State & state)54*4bdc9457SAndroid Build Coastguard Worker virtual void TearDown(benchmark::State& state) override
55*4bdc9457SAndroid Build Coastguard Worker {
56*4bdc9457SAndroid Build Coastguard Worker state.SetItemsProcessed(uint64_t(state.iterations()) * n());
57*4bdc9457SAndroid Build Coastguard Worker state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
58*4bdc9457SAndroid Build Coastguard Worker input_.clear();
59*4bdc9457SAndroid Build Coastguard Worker output_.clear();
60*4bdc9457SAndroid Build Coastguard Worker }
61*4bdc9457SAndroid Build Coastguard Worker
input() const62*4bdc9457SAndroid Build Coastguard Worker inline const int32_t* input() const
63*4bdc9457SAndroid Build Coastguard Worker {
64*4bdc9457SAndroid Build Coastguard Worker return input_.data();
65*4bdc9457SAndroid Build Coastguard Worker }
66*4bdc9457SAndroid Build Coastguard Worker
output()67*4bdc9457SAndroid Build Coastguard Worker inline uint8_t* output()
68*4bdc9457SAndroid Build Coastguard Worker {
69*4bdc9457SAndroid Build Coastguard Worker return output_.data();
70*4bdc9457SAndroid Build Coastguard Worker }
71*4bdc9457SAndroid Build Coastguard Worker
n() const72*4bdc9457SAndroid Build Coastguard Worker inline size_t n() const
73*4bdc9457SAndroid Build Coastguard Worker {
74*4bdc9457SAndroid Build Coastguard Worker return n_;
75*4bdc9457SAndroid Build Coastguard Worker }
76*4bdc9457SAndroid Build Coastguard Worker
77*4bdc9457SAndroid Build Coastguard Worker protected:
78*4bdc9457SAndroid Build Coastguard Worker std::vector<int32_t, AlignedAllocator<int32_t, 64>> input_;
79*4bdc9457SAndroid Build Coastguard Worker std::vector<uint8_t> output_;
80*4bdc9457SAndroid Build Coastguard Worker size_t n_;
81*4bdc9457SAndroid Build Coastguard Worker };
82*4bdc9457SAndroid Build Coastguard Worker
83*4bdc9457SAndroid Build Coastguard Worker
84*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_F(Requantization,fp32__neon)85*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
86*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
87*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_fp32__neon(
88*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
89*4bdc9457SAndroid Build Coastguard Worker }
90*4bdc9457SAndroid Build Coastguard Worker }
91*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,gemmlowp__neon)92*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
93*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
94*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_gemmlowp__neon(
95*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
96*4bdc9457SAndroid Build Coastguard Worker }
97*4bdc9457SAndroid Build Coastguard Worker }
98*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__neon)99*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__neon)(benchmark::State& state) {
100*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
101*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__neon(
102*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
103*4bdc9457SAndroid Build Coastguard Worker }
104*4bdc9457SAndroid Build Coastguard Worker }
105*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
106*4bdc9457SAndroid Build Coastguard Worker
107*4bdc9457SAndroid Build Coastguard Worker
108*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_F(Requantization,fp32__sse2)109*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
110*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
111*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_fp32__sse2(
112*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
113*4bdc9457SAndroid Build Coastguard Worker }
114*4bdc9457SAndroid Build Coastguard Worker }
115*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,gemmlowp__sse2)116*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
117*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
118*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_gemmlowp__sse2(
119*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
120*4bdc9457SAndroid Build Coastguard Worker }
121*4bdc9457SAndroid Build Coastguard Worker }
122*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,gemmlowp__ssse3)123*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
124*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
125*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_gemmlowp__ssse3(
126*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
127*4bdc9457SAndroid Build Coastguard Worker }
128*4bdc9457SAndroid Build Coastguard Worker }
129*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,gemmlowp__sse4)130*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
131*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
132*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_gemmlowp__sse4(
133*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
134*4bdc9457SAndroid Build Coastguard Worker }
135*4bdc9457SAndroid Build Coastguard Worker }
136*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__sse2)137*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__sse2)(benchmark::State& state) {
138*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
139*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__sse2(
140*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
141*4bdc9457SAndroid Build Coastguard Worker }
142*4bdc9457SAndroid Build Coastguard Worker }
143*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__ssse3)144*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__ssse3)(benchmark::State& state) {
145*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
146*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__ssse3(
147*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
148*4bdc9457SAndroid Build Coastguard Worker }
149*4bdc9457SAndroid Build Coastguard Worker }
150*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__sse4)151*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__sse4)(benchmark::State& state) {
152*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
153*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__sse4(
154*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
155*4bdc9457SAndroid Build Coastguard Worker }
156*4bdc9457SAndroid Build Coastguard Worker }
157*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
158*4bdc9457SAndroid Build Coastguard Worker
159*4bdc9457SAndroid Build Coastguard Worker
160*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_F(Requantization,fp32__wasmsimd)161*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
162*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
163*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_fp32__wasmsimd(
164*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
165*4bdc9457SAndroid Build Coastguard Worker }
166*4bdc9457SAndroid Build Coastguard Worker }
167*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,gemmlowp__wasmsimd)168*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__wasmsimd)(benchmark::State& state) {
169*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
170*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_gemmlowp__wasmsimd(
171*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
172*4bdc9457SAndroid Build Coastguard Worker }
173*4bdc9457SAndroid Build Coastguard Worker }
174*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
175*4bdc9457SAndroid Build Coastguard Worker
176*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,fp32__scalar_lrintf)177*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
178*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
179*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_fp32__scalar_lrintf(
180*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
181*4bdc9457SAndroid Build Coastguard Worker }
182*4bdc9457SAndroid Build Coastguard Worker }
183*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,fp32__scalar_fmagic)184*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, fp32__scalar_fmagic)(benchmark::State& state) {
185*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
186*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_fp32__scalar_fmagic(
187*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
188*4bdc9457SAndroid Build Coastguard Worker }
189*4bdc9457SAndroid Build Coastguard Worker }
190*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,gemmlowp__scalar)191*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
192*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
193*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_gemmlowp__scalar(
194*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
195*4bdc9457SAndroid Build Coastguard Worker }
196*4bdc9457SAndroid Build Coastguard Worker }
197*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__scalar_signed64)198*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__scalar_signed64)(benchmark::State& state) {
199*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
200*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__scalar_signed64(
201*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
202*4bdc9457SAndroid Build Coastguard Worker }
203*4bdc9457SAndroid Build Coastguard Worker }
204*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__scalar_unsigned32)205*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__scalar_unsigned32)(benchmark::State& state) {
206*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
207*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__scalar_unsigned32(
208*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
209*4bdc9457SAndroid Build Coastguard Worker }
210*4bdc9457SAndroid Build Coastguard Worker }
211*4bdc9457SAndroid Build Coastguard Worker
BENCHMARK_F(Requantization,rndna__scalar_unsigned64)212*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_F(Requantization, rndna__scalar_unsigned64)(benchmark::State& state) {
213*4bdc9457SAndroid Build Coastguard Worker for (auto _ : state) {
214*4bdc9457SAndroid Build Coastguard Worker xnn_qu8_requantize_rndna__scalar_unsigned64(
215*4bdc9457SAndroid Build Coastguard Worker n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
216*4bdc9457SAndroid Build Coastguard Worker }
217*4bdc9457SAndroid Build Coastguard Worker }
218*4bdc9457SAndroid Build Coastguard Worker
219*4bdc9457SAndroid Build Coastguard Worker
220*4bdc9457SAndroid Build Coastguard Worker #ifndef XNNPACK_BENCHMARK_NO_MAIN
221*4bdc9457SAndroid Build Coastguard Worker BENCHMARK_MAIN();
222*4bdc9457SAndroid Build Coastguard Worker #endif
223