xref: /aosp_15_r20/external/XNNPACK/test/vmulc-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <limits>
16 #include <random>
17 #include <vector>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/microfnptr.h>
21 #include <xnnpack/microparams-init.h>
22 #include <xnnpack/requantization.h>
23 
24 
25 class VMulCMicrokernelTester {
26  public:
batch_size(size_t batch_size)27   inline VMulCMicrokernelTester& batch_size(size_t batch_size) {
28     assert(batch_size != 0);
29     this->batch_size_ = batch_size;
30     return *this;
31   }
32 
batch_size()33   inline size_t batch_size() const {
34     return this->batch_size_;
35   }
36 
inplace(bool inplace)37   inline VMulCMicrokernelTester& inplace(bool inplace) {
38     this->inplace_ = inplace;
39     return *this;
40   }
41 
inplace()42   inline bool inplace() const {
43     return this->inplace_;
44   }
45 
a_scale(float a_scale)46   inline VMulCMicrokernelTester& a_scale(float a_scale) {
47     assert(a_scale > 0.0f);
48     assert(std::isnormal(a_scale));
49     this->a_scale_ = a_scale;
50     return *this;
51   }
52 
a_scale()53   inline float a_scale() const {
54     return this->a_scale_;
55   }
56 
a_zero_point(uint8_t a_zero_point)57   inline VMulCMicrokernelTester& a_zero_point(uint8_t a_zero_point) {
58     this->a_zero_point_ = a_zero_point;
59     return *this;
60   }
61 
a_zero_point()62   inline uint8_t a_zero_point() const {
63     return this->a_zero_point_;
64   }
65 
b_scale(float b_scale)66   inline VMulCMicrokernelTester& b_scale(float b_scale) {
67     assert(b_scale > 0.0f);
68     assert(std::isnormal(b_scale));
69     this->b_scale_ = b_scale;
70     return *this;
71   }
72 
b_scale()73   inline float b_scale() const {
74     return this->b_scale_;
75   }
76 
b_zero_point(uint8_t b_zero_point)77   inline VMulCMicrokernelTester& b_zero_point(uint8_t b_zero_point) {
78     this->b_zero_point_ = b_zero_point;
79     return *this;
80   }
81 
b_zero_point()82   inline uint8_t b_zero_point() const {
83     return this->b_zero_point_;
84   }
85 
y_scale(float y_scale)86   inline VMulCMicrokernelTester& y_scale(float y_scale) {
87     assert(y_scale > 0.0f);
88     assert(std::isnormal(y_scale));
89     this->y_scale_ = y_scale;
90     return *this;
91   }
92 
y_scale()93   inline float y_scale() const {
94     return this->y_scale_;
95   }
96 
y_zero_point(uint8_t y_zero_point)97   inline VMulCMicrokernelTester& y_zero_point(uint8_t y_zero_point) {
98     this->y_zero_point_ = y_zero_point;
99     return *this;
100   }
101 
y_zero_point()102   inline uint8_t y_zero_point() const {
103     return this->y_zero_point_;
104   }
105 
qmin(uint8_t qmin)106   inline VMulCMicrokernelTester& qmin(uint8_t qmin) {
107     this->qmin_ = qmin;
108     return *this;
109   }
110 
qmin()111   inline uint8_t qmin() const {
112     return this->qmin_;
113   }
114 
qmax(uint8_t qmax)115   inline VMulCMicrokernelTester& qmax(uint8_t qmax) {
116     this->qmax_ = qmax;
117     return *this;
118   }
119 
qmax()120   inline uint8_t qmax() const {
121     return this->qmax_;
122   }
123 
iterations(size_t iterations)124   inline VMulCMicrokernelTester& iterations(size_t iterations) {
125     this->iterations_ = iterations;
126     return *this;
127   }
128 
iterations()129   inline size_t iterations() const {
130     return this->iterations_;
131   }
132 
Test(xnn_qu8_vmul_minmax_ukernel_function vmul_minmax,xnn_init_qu8_mul_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)133   void Test(
134       xnn_qu8_vmul_minmax_ukernel_function vmul_minmax,
135       xnn_init_qu8_mul_minmax_params_fn init_params,
136       xnn_qu8_requantize_fn requantize) const
137   {
138     std::random_device random_device;
139     auto rng = std::mt19937(random_device());
140     auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
141 
142     std::vector<uint8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
143     std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
144     std::vector<float> y_fp(batch_size());
145     std::vector<uint8_t> y_ref(batch_size());
146     for (size_t iteration = 0; iteration < iterations(); iteration++) {
147       std::generate(a.begin(), a.end(), std::ref(u8rng));
148       const uint8_t b = u8rng();
149       if (inplace()) {
150         std::generate(y.begin(), y.end(), std::ref(u8rng));
151       } else {
152         std::fill(y.begin(), y.end(), 0xA5);
153       }
154       const uint8_t* a_data = inplace() ? y.data() : a.data();
155 
156       // Prepare parameters.
157       const float product_scale = a_scale() * b_scale();
158       const float product_output_scale = product_scale / y_scale();
159       xnn_qu8_mul_minmax_params quantization_params;
160       init_params(
161         &quantization_params,
162         a_zero_point(), b_zero_point(), y_zero_point(),
163         product_output_scale, qmin(), qmax());
164 
165       // Compute reference results.
166       for (size_t i = 0; i < batch_size(); i++) {
167         const int32_t acc =
168           (int32_t(a_data[i]) - int32_t(a_zero_point())) * (int32_t(b) - int32_t(b_zero_point()));
169         y_fp[i] = float(y_zero_point()) + product_output_scale * float(acc);
170         y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax())));
171         y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin())));
172         y_ref[i] = requantize(
173           acc, product_output_scale, y_zero_point(), qmin(), qmax());
174       }
175 
176       // Call optimized micro-kernel.
177       vmul_minmax(batch_size(), a_data, &b, y.data(), &quantization_params);
178 
179       // Verify results.
180       for (size_t i = 0; i < batch_size(); i++) {
181         ASSERT_LE(uint32_t(y[i]), uint32_t(qmax()))
182           << "at element " << i << " / " << batch_size();
183         ASSERT_GE(uint32_t(y[i]), uint32_t(qmin()))
184           << "at element " << i << " / " << batch_size();
185         ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f)
186           << "at element " << i << " / " << batch_size();
187         ASSERT_EQ(uint32_t(y[i]), uint32_t(y_ref[i]))
188           << "at element " << i << " / " << batch_size();
189       }
190     }
191   }
192 
Test(xnn_qs8_vmul_minmax_ukernel_function vmul_minmax,xnn_init_qs8_mul_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)193   void Test(
194       xnn_qs8_vmul_minmax_ukernel_function vmul_minmax,
195       xnn_init_qs8_mul_minmax_params_fn init_params,
196       xnn_qs8_requantize_fn requantize) const
197   {
198     std::random_device random_device;
199     auto rng = std::mt19937(random_device());
200     auto i8rng = std::bind(
201       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
202       rng);
203 
204     std::vector<int8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
205     std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
206     std::vector<float> y_fp(batch_size());
207     std::vector<int8_t> y_ref(batch_size());
208     for (size_t iteration = 0; iteration < iterations(); iteration++) {
209       std::generate(a.begin(), a.end(), std::ref(i8rng));
210       const int8_t b = i8rng();
211       if (inplace()) {
212         std::generate(y.begin(), y.end(), std::ref(i8rng));
213       } else {
214         std::fill(y.begin(), y.end(), 0xA5);
215       }
216       const int8_t* a_data = inplace() ? y.data() : a.data();
217 
218       // Prepare parameters.
219       const float product_scale = a_scale() * b_scale();
220       const float product_output_scale = product_scale / y_scale();
221       EXPECT_GE(product_output_scale, 0x1.0p-32f);
222       xnn_qs8_mul_minmax_params quantization_params;
223       init_params(
224         &quantization_params,
225         int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80),
226         product_output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
227 
228       // Compute reference results.
229       for (size_t i = 0; i < batch_size(); i++) {
230         const int32_t acc =
231           (int32_t(a_data[i]) - int32_t(a_zero_point() - 0x80)) * (int32_t(b) - int32_t(b_zero_point() - 0x80));
232         y_fp[i] = float(y_zero_point() - 0x80) + product_output_scale * float(acc);
233         y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax() - 0x80)));
234         y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin() - 0x80)));
235         y_ref[i] = requantize(
236           acc, product_output_scale, int8_t(y_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
237       }
238 
239       // Call optimized micro-kernel.
240       vmul_minmax(batch_size(), a_data, &b, y.data(), &quantization_params);
241 
242       // Verify results.
243       for (size_t i = 0; i < batch_size(); i++) {
244         ASSERT_LE(int32_t(y[i]), int32_t(qmax() - 0x80))
245           << "at element " << i << " / " << batch_size();
246         ASSERT_GE(int32_t(y[i]), int32_t(qmin() - 0x80))
247           << "at element " << i << " / " << batch_size();
248         ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
249           << "at element " << i << " / " << batch_size();
250         ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f)
251           << "at element " << i << " / " << batch_size();
252       }
253     }
254   }
255 
256  private:
257   size_t batch_size_{1};
258   bool inplace_{false};
259   float a_scale_{0.75f};
260   float b_scale_{1.25f};
261   float y_scale_{0.96875f};
262   uint8_t a_zero_point_{121};
263   uint8_t b_zero_point_{127};
264   uint8_t y_zero_point_{133};
265   uint8_t qmin_{0};
266   uint8_t qmax_{255};
267   size_t iterations_{15};
268 };
269