xref: /aosp_15_r20/external/XNNPACK/test/vmul-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <limits>
16 #include <random>
17 #include <vector>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/microfnptr.h>
21 #include <xnnpack/microparams-init.h>
22 #include <xnnpack/requantization.h>
23 
24 
25 class VMulMicrokernelTester {
26  public:
batch_size(size_t batch_size)27   inline VMulMicrokernelTester& batch_size(size_t batch_size) {
28     assert(batch_size != 0);
29     this->batch_size_ = batch_size;
30     return *this;
31   }
32 
batch_size()33   inline size_t batch_size() const {
34     return this->batch_size_;
35   }
36 
inplace_a(bool inplace_a)37   inline VMulMicrokernelTester& inplace_a(bool inplace_a) {
38     this->inplace_a_ = inplace_a;
39     return *this;
40   }
41 
inplace_a()42   inline bool inplace_a() const {
43     return this->inplace_a_;
44   }
45 
inplace_b(bool inplace_b)46   inline VMulMicrokernelTester& inplace_b(bool inplace_b) {
47     this->inplace_b_ = inplace_b;
48     return *this;
49   }
50 
inplace_b()51   inline bool inplace_b() const {
52     return this->inplace_b_;
53   }
54 
a_scale(float a_scale)55   inline VMulMicrokernelTester& a_scale(float a_scale) {
56     assert(a_scale > 0.0f);
57     assert(std::isnormal(a_scale));
58     this->a_scale_ = a_scale;
59     return *this;
60   }
61 
a_scale()62   inline float a_scale() const {
63     return this->a_scale_;
64   }
65 
a_zero_point(uint8_t a_zero_point)66   inline VMulMicrokernelTester& a_zero_point(uint8_t a_zero_point) {
67     this->a_zero_point_ = a_zero_point;
68     return *this;
69   }
70 
a_zero_point()71   inline uint8_t a_zero_point() const {
72     return this->a_zero_point_;
73   }
74 
b_scale(float b_scale)75   inline VMulMicrokernelTester& b_scale(float b_scale) {
76     assert(b_scale > 0.0f);
77     assert(std::isnormal(b_scale));
78     this->b_scale_ = b_scale;
79     return *this;
80   }
81 
b_scale()82   inline float b_scale() const {
83     return this->b_scale_;
84   }
85 
b_zero_point(uint8_t b_zero_point)86   inline VMulMicrokernelTester& b_zero_point(uint8_t b_zero_point) {
87     this->b_zero_point_ = b_zero_point;
88     return *this;
89   }
90 
b_zero_point()91   inline uint8_t b_zero_point() const {
92     return this->b_zero_point_;
93   }
94 
y_scale(float y_scale)95   inline VMulMicrokernelTester& y_scale(float y_scale) {
96     assert(y_scale > 0.0f);
97     assert(std::isnormal(y_scale));
98     this->y_scale_ = y_scale;
99     return *this;
100   }
101 
y_scale()102   inline float y_scale() const {
103     return this->y_scale_;
104   }
105 
y_zero_point(uint8_t y_zero_point)106   inline VMulMicrokernelTester& y_zero_point(uint8_t y_zero_point) {
107     this->y_zero_point_ = y_zero_point;
108     return *this;
109   }
110 
y_zero_point()111   inline uint8_t y_zero_point() const {
112     return this->y_zero_point_;
113   }
114 
qmin(uint8_t qmin)115   inline VMulMicrokernelTester& qmin(uint8_t qmin) {
116     this->qmin_ = qmin;
117     return *this;
118   }
119 
qmin()120   inline uint8_t qmin() const {
121     return this->qmin_;
122   }
123 
qmax(uint8_t qmax)124   inline VMulMicrokernelTester& qmax(uint8_t qmax) {
125     this->qmax_ = qmax;
126     return *this;
127   }
128 
qmax()129   inline uint8_t qmax() const {
130     return this->qmax_;
131   }
132 
iterations(size_t iterations)133   inline VMulMicrokernelTester& iterations(size_t iterations) {
134     this->iterations_ = iterations;
135     return *this;
136   }
137 
iterations()138   inline size_t iterations() const {
139     return this->iterations_;
140   }
141 
Test(xnn_qu8_vmul_minmax_ukernel_function vmul_minmax,xnn_init_qu8_mul_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)142   void Test(
143       xnn_qu8_vmul_minmax_ukernel_function vmul_minmax,
144       xnn_init_qu8_mul_minmax_params_fn init_params,
145       xnn_qu8_requantize_fn requantize) const
146   {
147     std::random_device random_device;
148     auto rng = std::mt19937(random_device());
149     auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
150 
151     std::vector<uint8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
152     std::vector<uint8_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
153     std::vector<uint8_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
154     std::vector<float> y_fp(batch_size());
155     std::vector<uint8_t> y_ref(batch_size());
156     for (size_t iteration = 0; iteration < iterations(); iteration++) {
157       std::generate(a.begin(), a.end(), std::ref(u8rng));
158       std::generate(b.begin(), b.end(), std::ref(u8rng));
159       if (inplace_a() || inplace_b()) {
160         std::generate(y.begin(), y.end(), std::ref(u8rng));
161       } else {
162         std::fill(y.begin(), y.end(), 0xA5);
163       }
164       const uint8_t* a_data = inplace_a() ? y.data() : a.data();
165       const uint8_t* b_data = inplace_b() ? y.data() : b.data();
166 
167       // Prepare parameters.
168       const float product_scale = a_scale() * b_scale();
169       const float product_output_scale = product_scale / y_scale();
170       xnn_qu8_mul_minmax_params quantization_params;
171       init_params(
172         &quantization_params,
173         a_zero_point(), b_zero_point(), y_zero_point(),
174         product_output_scale, qmin(), qmax());
175 
176       // Compute reference results.
177       for (size_t i = 0; i < batch_size(); i++) {
178         const int32_t acc =
179           (int32_t(a_data[i]) - int32_t(a_zero_point())) * (int32_t(b_data[i]) - int32_t(b_zero_point()));
180         y_fp[i] = float(y_zero_point()) + product_output_scale * float(acc);
181         y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax())));
182         y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin())));
183         y_ref[i] = requantize(
184           acc, product_output_scale, y_zero_point(), qmin(), qmax());
185       }
186 
187       // Call optimized micro-kernel.
188       vmul_minmax(batch_size(), a_data, b_data, y.data(), &quantization_params);
189 
190       // Verify results.
191       for (size_t i = 0; i < batch_size(); i++) {
192         ASSERT_LE(uint32_t(y[i]), uint32_t(qmax()))
193           << "at element " << i << " / " << batch_size();
194         ASSERT_GE(uint32_t(y[i]), uint32_t(qmin()))
195           << "at element " << i << " / " << batch_size();
196         ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f)
197           << "at element " << i << " / " << batch_size();
198         ASSERT_EQ(uint32_t(y[i]), uint32_t(y_ref[i]))
199           << "at element " << i << " / " << batch_size();
200       }
201     }
202   }
203 
Test(xnn_qs8_vmul_minmax_ukernel_function vmul_minmax,xnn_init_qs8_mul_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)204   void Test(
205       xnn_qs8_vmul_minmax_ukernel_function vmul_minmax,
206       xnn_init_qs8_mul_minmax_params_fn init_params,
207       xnn_qs8_requantize_fn requantize) const
208   {
209     std::random_device random_device;
210     auto rng = std::mt19937(random_device());
211     auto i8rng = std::bind(
212       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
213       rng);
214 
215     std::vector<int8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
216     std::vector<int8_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
217     std::vector<int8_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
218     std::vector<float> y_fp(batch_size());
219     std::vector<int8_t> y_ref(batch_size());
220     for (size_t iteration = 0; iteration < iterations(); iteration++) {
221       std::generate(a.begin(), a.end(), std::ref(i8rng));
222       std::generate(b.begin(), b.end(), std::ref(i8rng));
223       if (inplace_a() || inplace_b()) {
224         std::generate(y.begin(), y.end(), std::ref(i8rng));
225       } else {
226         std::fill(y.begin(), y.end(), 0xA5);
227       }
228       const int8_t* a_data = inplace_a() ? y.data() : a.data();
229       const int8_t* b_data = inplace_b() ? y.data() : b.data();
230 
231       // Prepare parameters.
232       const float product_scale = a_scale() * b_scale();
233       const float product_output_scale = product_scale / y_scale();
234       EXPECT_GE(product_output_scale, 0x1.0p-32f);
235       xnn_qs8_mul_minmax_params quantization_params;
236       init_params(
237         &quantization_params,
238         int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80),
239         product_output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
240 
241       // Compute reference results.
242       for (size_t i = 0; i < batch_size(); i++) {
243         const int32_t acc =
244           (int32_t(a_data[i]) - int32_t(a_zero_point() - 0x80)) * (int32_t(b_data[i]) - int32_t(b_zero_point() - 0x80));
245         y_fp[i] = float(y_zero_point() - 0x80) + product_output_scale * float(acc);
246         y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax() - 0x80)));
247         y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin() - 0x80)));
248         y_ref[i] = requantize(
249           acc, product_output_scale, int8_t(y_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
250       }
251 
252       // Call optimized micro-kernel.
253       vmul_minmax(batch_size(), a_data, b_data, y.data(), &quantization_params);
254 
255       // Verify results.
256       for (size_t i = 0; i < batch_size(); i++) {
257         ASSERT_LE(int32_t(y[i]), int32_t(qmax() - 0x80))
258           << "at element " << i << " / " << batch_size();
259         ASSERT_GE(int32_t(y[i]), int32_t(qmin() - 0x80))
260           << "at element " << i << " / " << batch_size();
261         ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
262           << "at element " << i << " / " << batch_size();
263         ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f)
264           << "at element " << i << " / " << batch_size();
265       }
266     }
267   }
268 
269  private:
270   size_t batch_size_{1};
271   bool inplace_a_{false};
272   bool inplace_b_{false};
273   float a_scale_{0.75f};
274   float b_scale_{1.25f};
275   float y_scale_{0.96875f};
276   uint8_t a_zero_point_{121};
277   uint8_t b_zero_point_{127};
278   uint8_t y_zero_point_{133};
279   uint8_t qmin_{0};
280   uint8_t qmax_{255};
281   size_t iterations_{15};
282 };
283