xref: /aosp_15_r20/external/XNNPACK/test/vbinary-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/microfnptr.h>
22 #include <xnnpack/microparams-init.h>
23 
24 
25 class VBinaryMicrokernelTester {
26  public:
27   enum class OpType {
28     Add,
29     Div,
30     Max,
31     Min,
32     Mul,
33     Sub,
34     SqrDiff,
35   };
36 
batch_size(size_t batch_size)37   inline VBinaryMicrokernelTester& batch_size(size_t batch_size) {
38     assert(batch_size != 0);
39     this->batch_size_ = batch_size;
40     return *this;
41   }
42 
batch_size()43   inline size_t batch_size() const {
44     return this->batch_size_;
45   }
46 
inplace_a(bool inplace_a)47   inline VBinaryMicrokernelTester& inplace_a(bool inplace_a) {
48     this->inplace_a_ = inplace_a;
49     return *this;
50   }
51 
inplace_a()52   inline bool inplace_a() const {
53     return this->inplace_a_;
54   }
55 
inplace_b(bool inplace_b)56   inline VBinaryMicrokernelTester& inplace_b(bool inplace_b) {
57     this->inplace_b_ = inplace_b;
58     return *this;
59   }
60 
inplace_b()61   inline bool inplace_b() const {
62     return this->inplace_b_;
63   }
64 
qmin(uint8_t qmin)65   inline VBinaryMicrokernelTester& qmin(uint8_t qmin) {
66     this->qmin_ = qmin;
67     return *this;
68   }
69 
qmin()70   inline uint8_t qmin() const {
71     return this->qmin_;
72   }
73 
qmax(uint8_t qmax)74   inline VBinaryMicrokernelTester& qmax(uint8_t qmax) {
75     this->qmax_ = qmax;
76     return *this;
77   }
78 
qmax()79   inline uint8_t qmax() const {
80     return this->qmax_;
81   }
82 
iterations(size_t iterations)83   inline VBinaryMicrokernelTester& iterations(size_t iterations) {
84     this->iterations_ = iterations;
85     return *this;
86   }
87 
iterations()88   inline size_t iterations() const {
89     return this->iterations_;
90   }
91 
Test(xnn_f16_vbinary_ukernel_function vbinary,OpType op_type)92   void Test(xnn_f16_vbinary_ukernel_function vbinary, OpType op_type) const {
93     std::random_device random_device;
94     auto rng = std::mt19937(random_device());
95     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
96 
97     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
98     std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
99     std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
100     std::vector<float> y_ref(batch_size());
101     for (size_t iteration = 0; iteration < iterations(); iteration++) {
102       std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
103       std::generate(b.begin(), b.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
104       if (inplace_a() || inplace_b()) {
105         std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
106       } else {
107         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
108       }
109       const uint16_t* a_data = inplace_a() ? y.data() : a.data();
110       const uint16_t* b_data = inplace_b() ? y.data() : b.data();
111 
112       // Compute reference results.
113       for (size_t i = 0; i < batch_size(); i++) {
114         switch (op_type) {
115           case OpType::Add:
116             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]);
117             break;
118           case OpType::Div:
119             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]);
120             break;
121           case OpType::Max:
122             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
123             break;
124           case OpType::Min:
125             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
126             break;
127           case OpType::Mul:
128             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]);
129             break;
130           case OpType::SqrDiff:
131           {
132             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
133             y_ref[i] = diff * diff;
134             break;
135           }
136           case OpType::Sub:
137             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
138             break;
139         }
140       }
141 
142       // Call optimized micro-kernel.
143       vbinary(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), nullptr);
144 
145       // Verify results.
146       for (size_t i = 0; i < batch_size(); i++) {
147         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
148           << "at " << i << " / " << batch_size();
149       }
150     }
151   }
152 
Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)153   void Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const {
154     std::random_device random_device;
155     auto rng = std::mt19937(random_device());
156     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
157 
158     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
159     std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
160     std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
161     std::vector<float> y_ref(batch_size());
162     for (size_t iteration = 0; iteration < iterations(); iteration++) {
163       std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
164       std::generate(b.begin(), b.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
165       if (inplace_a() || inplace_b()) {
166         std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
167       } else {
168         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
169       }
170       const uint16_t* a_data = inplace_a() ? y.data() : a.data();
171       const uint16_t* b_data = inplace_b() ? y.data() : b.data();
172 
173       // Compute reference results.
174       for (size_t i = 0; i < batch_size(); i++) {
175         switch (op_type) {
176           case OpType::Add:
177             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]);
178             break;
179           case OpType::Div:
180             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]);
181             break;
182           case OpType::Max:
183             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
184             break;
185           case OpType::Min:
186             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
187             break;
188           case OpType::Mul:
189             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]);
190             break;
191           case OpType::SqrDiff:
192           {
193             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
194             y_ref[i] = diff * diff;
195             break;
196           }
197           case OpType::Sub:
198             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
199             break;
200         }
201       }
202 
203       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
204       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
205       const float accumulated_range = accumulated_max - accumulated_min;
206       const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
207         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
208         +std::numeric_limits<float>::infinity()));
209       const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
210         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
211         -std::numeric_limits<float>::infinity()));
212       for (size_t i = 0; i < batch_size(); i++) {
213         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
214       }
215 
216       // Prepare parameters.
217       xnn_f16_minmax_params params;
218       init_params(&params,
219         fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max));
220 
221       // Call optimized micro-kernel.
222       vbinary_minmax(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), &params);
223 
224       // Verify results.
225       for (size_t i = 0; i < batch_size(); i++) {
226         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
227           << "at " << i << " / " << batch_size();
228       }
229     }
230   }
231 
232   void Test(xnn_f32_vbinary_ukernel_function vbinary, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const {
233     std::random_device random_device;
234     auto rng = std::mt19937(random_device());
235     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
236 
237     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
238     std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
239     std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
240     std::vector<float> y_ref(batch_size());
241     for (size_t iteration = 0; iteration < iterations(); iteration++) {
242       std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); });
243       std::generate(b.begin(), b.end(), [&]() { return f32dist(rng); });
244       if (inplace_a() || inplace_b()) {
245         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
246       } else {
247         std::fill(y.begin(), y.end(), nanf(""));
248       }
249       const float* a_data = inplace_a() ? y.data() : a.data();
250       const float* b_data = inplace_b() ? y.data() : b.data();
251 
252       // Compute reference results.
253       for (size_t i = 0; i < batch_size(); i++) {
254         switch (op_type) {
255           case OpType::Add:
256             y_ref[i] = a_data[i] + b_data[i];
257             break;
258           case OpType::Div:
259             y_ref[i] = a_data[i] / b_data[i];
260             break;
261           case OpType::Max:
262             y_ref[i] = std::max<float>(a_data[i], b_data[i]);
263             break;
264           case OpType::Min:
265             y_ref[i] = std::min<float>(a_data[i], b_data[i]);
266             break;
267           case OpType::Mul:
268             y_ref[i] = a_data[i] * b_data[i];
269             break;
270           case OpType::SqrDiff:
271           {
272             const float diff = a_data[i] - b_data[i];
273             y_ref[i] = diff * diff;
274             break;
275           }
276           case OpType::Sub:
277             y_ref[i] = a_data[i] - b_data[i];
278             break;
279         }
280       }
281 
282       // Prepare parameters.
283       xnn_f32_default_params params;
284       if (init_params) {
285         init_params(&params);
286       }
287 
288       // Call optimized micro-kernel.
289       vbinary(batch_size() * sizeof(float), a_data, b_data, y.data(), init_params != nullptr ? &params : nullptr);
290 
291       // Verify results.
292       for (size_t i = 0; i < batch_size(); i++) {
293         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
294           << "at " << i << " / " << batch_size();
295       }
296     }
297   }
298 
Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu,OpType op_type)299   void Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu, OpType op_type) const {
300     std::random_device random_device;
301     auto rng = std::mt19937(random_device());
302     std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
303 
304     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
305     std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
306     std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
307     std::vector<float> y_ref(batch_size());
308     for (size_t iteration = 0; iteration < iterations(); iteration++) {
309       std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); });
310       std::generate(b.begin(), b.end(), [&]() { return f32dist(rng); });
311       if (inplace_a() || inplace_b()) {
312         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
313       } else {
314         std::fill(y.begin(), y.end(), nanf(""));
315       }
316       const float* a_data = inplace_a() ? y.data() : a.data();
317       const float* b_data = inplace_b() ? y.data() : b.data();
318 
319       // Compute reference results.
320       for (size_t i = 0; i < batch_size(); i++) {
321         switch (op_type) {
322           case OpType::Add:
323             y_ref[i] = a_data[i] + b_data[i];
324             break;
325           case OpType::Div:
326             y_ref[i] = a_data[i] / b_data[i];
327             break;
328           case OpType::Max:
329             y_ref[i] = std::max<float>(a_data[i], b_data[i]);
330             break;
331           case OpType::Min:
332             y_ref[i] = std::min<float>(a_data[i], b_data[i]);
333             break;
334           case OpType::Mul:
335             y_ref[i] = a_data[i] * b_data[i];
336             break;
337           case OpType::SqrDiff:
338           {
339             const float diff = a_data[i] - b_data[i];
340             y_ref[i] = diff * diff;
341             break;
342           }
343           case OpType::Sub:
344             y_ref[i] = a_data[i] - b_data[i];
345             break;
346         }
347       }
348       for (size_t i = 0; i < batch_size(); i++) {
349         y_ref[i] = std::max(y_ref[i], 0.0f);
350       }
351 
352       // Call optimized micro-kernel.
353       vbinary_relu(batch_size() * sizeof(float), a_data, b_data, y.data(), nullptr);
354 
355       // Verify results.
356       for (size_t i = 0; i < batch_size(); i++) {
357         ASSERT_GE(y[i], 0.0f)
358           << "at " << i << " / " << batch_size();
359         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
360           << "at " << i << " / " << batch_size();
361       }
362     }
363   }
364 
Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)365   void Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const {
366     std::random_device random_device;
367     auto rng = std::mt19937(random_device());
368     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
369 
370     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
371     std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
372     std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
373     std::vector<float> y_ref(batch_size());
374     for (size_t iteration = 0; iteration < iterations(); iteration++) {
375       std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); });
376       std::generate(b.begin(), b.end(), [&]() { return f32dist(rng); });
377       if (inplace_a() || inplace_b()) {
378         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
379       } else {
380         std::fill(y.begin(), y.end(), nanf(""));
381       }
382       const float* a_data = inplace_a() ? y.data() : a.data();
383       const float* b_data = inplace_b() ? y.data() : b.data();
384 
385       // Compute reference results.
386       for (size_t i = 0; i < batch_size(); i++) {
387         switch (op_type) {
388           case OpType::Add:
389             y_ref[i] = a_data[i] + b_data[i];
390             break;
391           case OpType::Div:
392             y_ref[i] = a_data[i] / b_data[i];
393             break;
394           case OpType::Max:
395             y_ref[i] = std::max<float>(a_data[i], b_data[i]);
396             break;
397           case OpType::Min:
398             y_ref[i] = std::min<float>(a_data[i], b_data[i]);
399             break;
400           case OpType::Mul:
401             y_ref[i] = a_data[i] * b_data[i];
402             break;
403           case OpType::SqrDiff:
404           {
405             const float diff = a_data[i] - b_data[i];
406             y_ref[i] = diff * diff;
407             break;
408           }
409           case OpType::Sub:
410             y_ref[i] = a_data[i] - b_data[i];
411             break;
412         }
413       }
414       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
415       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
416       const float accumulated_range = accumulated_max - accumulated_min;
417       const float y_max = accumulated_range > 0.0f ?
418         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
419         +std::numeric_limits<float>::infinity();
420       const float y_min = accumulated_range > 0.0f ?
421         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
422         -std::numeric_limits<float>::infinity();
423       for (size_t i = 0; i < batch_size(); i++) {
424         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
425       }
426 
427       // Prepare parameters.
428       xnn_f32_minmax_params params;
429       init_params(&params, y_min, y_max);
430 
431       // Call optimized micro-kernel.
432       vbinary_minmax(batch_size() * sizeof(float), a_data, b_data, y.data(), &params);
433 
434       // Verify results.
435       for (size_t i = 0; i < batch_size(); i++) {
436         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
437           << "at " << i << " / " << batch_size();
438       }
439     }
440   }
441 
442  private:
443   size_t batch_size_{1};
444   bool inplace_a_{false};
445   bool inplace_b_{false};
446   uint8_t qmin_{0};
447   uint8_t qmax_{255};
448   size_t iterations_{15};
449 };
450