xref: /aosp_15_r20/external/XNNPACK/test/vaddc-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cstddef>
16 #include <cstdlib>
17 #include <functional>
18 #include <limits>
19 #include <random>
20 #include <vector>
21 
22 #include <xnnpack.h>
23 #include <xnnpack/microfnptr.h>
24 #include <xnnpack/microparams-init.h>
25 #include <xnnpack/requantization.h>
26 
27 
28 class VAddCMicrokernelTester {
29  public:
batch_size(size_t batch_size)30   inline VAddCMicrokernelTester& batch_size(size_t batch_size) {
31     assert(batch_size != 0);
32     this->batch_size_ = batch_size;
33     return *this;
34   }
35 
batch_size()36   inline size_t batch_size() const {
37     return this->batch_size_;
38   }
39 
inplace(bool inplace)40   inline VAddCMicrokernelTester& inplace(bool inplace) {
41     this->inplace_ = inplace;
42     return *this;
43   }
44 
inplace()45   inline bool inplace() const {
46     return this->inplace_;
47   }
48 
a_scale(float a_scale)49   inline VAddCMicrokernelTester& a_scale(float a_scale) {
50     assert(a_scale > 0.0f);
51     assert(std::isnormal(a_scale));
52     this->a_scale_ = a_scale;
53     return *this;
54   }
55 
a_scale()56   inline float a_scale() const {
57     return this->a_scale_;
58   }
59 
a_zero_point(uint8_t a_zero_point)60   inline VAddCMicrokernelTester& a_zero_point(uint8_t a_zero_point) {
61     this->a_zero_point_ = a_zero_point;
62     return *this;
63   }
64 
a_zero_point()65   inline uint8_t a_zero_point() const {
66     return this->a_zero_point_;
67   }
68 
b_scale(float b_scale)69   inline VAddCMicrokernelTester& b_scale(float b_scale) {
70     assert(b_scale > 0.0f);
71     assert(std::isnormal(b_scale));
72     this->b_scale_ = b_scale;
73     return *this;
74   }
75 
b_scale()76   inline float b_scale() const {
77     return this->b_scale_;
78   }
79 
b_zero_point(uint8_t b_zero_point)80   inline VAddCMicrokernelTester& b_zero_point(uint8_t b_zero_point) {
81     this->b_zero_point_ = b_zero_point;
82     return *this;
83   }
84 
b_zero_point()85   inline uint8_t b_zero_point() const {
86     return this->b_zero_point_;
87   }
88 
y_scale(float y_scale)89   inline VAddCMicrokernelTester& y_scale(float y_scale) {
90     assert(y_scale > 0.0f);
91     assert(std::isnormal(y_scale));
92     this->y_scale_ = y_scale;
93     return *this;
94   }
95 
y_scale()96   inline float y_scale() const {
97     return this->y_scale_;
98   }
99 
y_zero_point(uint8_t y_zero_point)100   inline VAddCMicrokernelTester& y_zero_point(uint8_t y_zero_point) {
101     this->y_zero_point_ = y_zero_point;
102     return *this;
103   }
104 
y_zero_point()105   inline uint8_t y_zero_point() const {
106     return this->y_zero_point_;
107   }
108 
qmin(uint8_t qmin)109   inline VAddCMicrokernelTester& qmin(uint8_t qmin) {
110     this->qmin_ = qmin;
111     return *this;
112   }
113 
qmin()114   inline uint8_t qmin() const {
115     return this->qmin_;
116   }
117 
qmax(uint8_t qmax)118   inline VAddCMicrokernelTester& qmax(uint8_t qmax) {
119     this->qmax_ = qmax;
120     return *this;
121   }
122 
qmax()123   inline uint8_t qmax() const {
124     return this->qmax_;
125   }
126 
iterations(size_t iterations)127   inline VAddCMicrokernelTester& iterations(size_t iterations) {
128     this->iterations_ = iterations;
129     return *this;
130   }
131 
iterations()132   inline size_t iterations() const {
133     return this->iterations_;
134   }
135 
Test(xnn_qu8_vadd_minmax_ukernel_function vaddc_minmax,xnn_init_qu8_add_minmax_params_fn init_params)136   void Test(xnn_qu8_vadd_minmax_ukernel_function vaddc_minmax, xnn_init_qu8_add_minmax_params_fn init_params) const {
137     std::random_device random_device;
138     auto rng = std::mt19937(random_device());
139     auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
140 
141     std::vector<uint8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
142     std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
143     std::vector<float> y_fp(batch_size());
144     std::vector<uint8_t> y_ref(batch_size());
145     for (size_t iteration = 0; iteration < iterations(); iteration++) {
146       std::generate(a.begin(), a.end(), std::ref(u8rng));
147       if (inplace()) {
148         std::generate(y.begin(), y.end(), std::ref(u8rng));
149       } else {
150         std::fill(y.begin(), y.end(), 0xA5);
151       }
152       const uint8_t* a_data = inplace() ? y.data() : a.data();
153       const uint8_t b = u8rng();
154 
155       // Prepare parameters.
156       xnn_qu8_add_minmax_params quantization_params;
157       init_params(
158         &quantization_params,
159         a_zero_point(), b_zero_point(), y_zero_point(),
160         a_scale() / y_scale(), b_scale() / y_scale(),
161         qmin(), qmax());
162       xnn_qu8_add_minmax_params scalar_quantization_params;
163       xnn_init_qu8_add_minmax_scalar_params(
164         &scalar_quantization_params,
165         a_zero_point(), b_zero_point(), y_zero_point(),
166         a_scale() / y_scale(), b_scale() / y_scale(),
167         qmin(), qmax());
168 
169       // Compute reference results.
170       for (size_t i = 0; i < batch_size(); i++) {
171         y_fp[i] = float(y_zero_point()) +
172           float(int32_t(a_data[i]) - int32_t(a_zero_point())) * (a_scale() / y_scale()) +
173           float(int32_t(b) - int32_t(b_zero_point())) * (b_scale() / y_scale());
174         y_fp[i] = std::min<float>(y_fp[i], float(qmax()));
175         y_fp[i] = std::max<float>(y_fp[i], float(qmin()));
176         y_ref[i] = xnn_qu8_quantize_add(a_data[i], b, scalar_quantization_params);
177       }
178 
179       // Call optimized micro-kernel.
180       vaddc_minmax(batch_size(), a_data, &b, y.data(), &quantization_params);
181 
182       // Verify results.
183       for (size_t i = 0; i < batch_size(); i++) {
184         ASSERT_LE(uint32_t(y[i]), uint32_t(qmax()))
185           << "at element " << i << " / " << batch_size();
186         ASSERT_GE(uint32_t(y[i]), uint32_t(qmin()))
187           << "at element " << i << " / " << batch_size();
188         ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f)
189           << "at element " << i << " / " << batch_size();
190         ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
191           << "at element " << i << " / " << batch_size();
192       }
193     }
194   }
195 
Test(xnn_qs8_vadd_minmax_ukernel_function vaddc_minmax,xnn_init_qs8_add_minmax_params_fn init_params)196   void Test(xnn_qs8_vadd_minmax_ukernel_function vaddc_minmax, xnn_init_qs8_add_minmax_params_fn init_params) const {
197     std::random_device random_device;
198     auto rng = std::mt19937(random_device());
199     auto i8rng = std::bind(
200       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng);
201 
202     std::vector<int8_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
203     std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
204     std::vector<float> y_fp(batch_size());
205     std::vector<int8_t> y_ref(batch_size());
206     for (size_t iteration = 0; iteration < iterations(); iteration++) {
207       std::generate(a.begin(), a.end(), std::ref(i8rng));
208       if (inplace()) {
209         std::generate(y.begin(), y.end(), std::ref(i8rng));
210       } else {
211         std::fill(y.begin(), y.end(), 0xA5);
212       }
213       const int8_t* a_data = inplace() ? y.data() : a.data();
214       const int8_t b = i8rng();
215 
216       // Prepare parameters.
217       xnn_qs8_add_minmax_params quantization_params;
218       init_params(
219         &quantization_params,
220         int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80),
221         a_scale() / y_scale(), b_scale() / y_scale(),
222         int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
223       xnn_qs8_add_minmax_params scalar_quantization_params;
224       xnn_init_qs8_add_minmax_scalar_params(
225         &scalar_quantization_params,
226         int8_t(a_zero_point() - 0x80), int8_t(b_zero_point() - 0x80), int8_t(y_zero_point() - 0x80),
227         a_scale() / y_scale(), b_scale() / y_scale(),
228         int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
229 
230       // Compute reference results.
231       for (size_t i = 0; i < batch_size(); i++) {
232         y_fp[i] = float(int32_t(y_zero_point() - 0x80)) +
233           float(int32_t(a_data[i]) - int32_t(a_zero_point() - 0x80)) * (a_scale() / y_scale()) +
234           float(int32_t(b) - int32_t(b_zero_point() - 0x80)) * (b_scale() / y_scale());
235         y_fp[i] = std::min<float>(y_fp[i], float(int32_t(qmax() - 0x80)));
236         y_fp[i] = std::max<float>(y_fp[i], float(int32_t(qmin() - 0x80)));
237         y_ref[i] = xnn_qs8_quantize_add(a_data[i], b, scalar_quantization_params);
238       }
239 
240       // Call optimized micro-kernel.
241       vaddc_minmax(batch_size(), a_data, &b, y.data(), &quantization_params);
242 
243       // Verify results.
244       for (size_t i = 0; i < batch_size(); i++) {
245         ASSERT_LE(int32_t(y[i]), int32_t(qmax() - 0x80))
246           << "at element " << i << " / " << batch_size();
247         ASSERT_GE(int32_t(y[i]), int32_t(qmin() - 0x80))
248           << "at element " << i << " / " << batch_size();
249         ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
250           << "at element " << i << " / " << batch_size();
251         ASSERT_NEAR(float(int32_t(y[i])), y_fp[i], 0.6f)
252           << "at element " << i << " / " << batch_size();
253       }
254     }
255   }
256 
257  private:
258   size_t batch_size_{1};
259   bool inplace_{false};
260   float a_scale_{0.75f};
261   float b_scale_{1.25f};
262   float y_scale_{0.96875f};
263   uint8_t a_zero_point_{121};
264   uint8_t b_zero_point_{127};
265   uint8_t y_zero_point_{133};
266   uint8_t qmin_{0};
267   uint8_t qmax_{255};
268   size_t iterations_{15};
269 };
270