xref: /aosp_15_r20/external/XNNPACK/test/ibilinear-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cmath>
13 #include <cstddef>
14 #include <cstdint>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/aligned-allocator.h>
22 #include <xnnpack/microfnptr.h>
23 #include <xnnpack/math.h>
24 
25 
26 class IBilinearMicrokernelTester {
27  public:
pixels(uint32_t pixels)28   inline IBilinearMicrokernelTester& pixels(uint32_t pixels) {
29     assert(pixels >= 1);
30     this->pixels_ = pixels;
31     return *this;
32   }
33 
pixels()34   inline uint32_t pixels() const {
35     return this->pixels_;
36   }
37 
channels(uint32_t channels)38   inline IBilinearMicrokernelTester& channels(uint32_t channels) {
39     assert(channels >= 1);
40     this->channels_ = channels;
41     return *this;
42   }
43 
channels()44   inline uint32_t channels() const {
45     return this->channels_;
46   }
47 
input_offset(uint32_t input_offset)48   inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) {
49     this->input_offset_ = input_offset;
50     return *this;
51   }
52 
input_offset()53   inline uint32_t input_offset() const {
54     return this->input_offset_;
55   }
56 
output_stride(uint32_t output_stride)57   inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) {
58     assert(output_stride != 0);
59     this->output_stride_ = output_stride;
60     return *this;
61   }
62 
output_stride()63   inline uint32_t output_stride() const {
64     if (this->output_stride_ == 0) {
65       return channels();
66     } else {
67       assert(this->output_stride_ >= channels());
68       return this->output_stride_;
69     }
70   }
71 
iterations(size_t iterations)72   inline IBilinearMicrokernelTester& iterations(size_t iterations) {
73     this->iterations_ = iterations;
74     return *this;
75   }
76 
iterations()77   inline size_t iterations() const {
78     return this->iterations_;
79   }
80 
input_stride(uint32_t input_stride)81   inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) {
82     assert(input_stride != 0);
83     this->input_stride_ = input_stride;
84     return *this;
85   }
86 
input_stride()87   inline uint32_t input_stride() const {
88     if (this->input_stride_ == 0) {
89       return 4 * pixels();
90     } else {
91       assert(this->input_stride_ >= 4 * pixels());
92       return this->input_stride_;
93     }
94   }
95 
Test(xnn_f16_ibilinear_ukernel_function ibilinear)96   void Test(xnn_f16_ibilinear_ukernel_function ibilinear) const {
97     std::random_device random_device;
98     auto rng = std::mt19937(random_device());
99     std::uniform_real_distribution<float> f32dist(0.1f, 1.0f);
100 
101     std::vector<const uint16_t*> indirection(pixels() * 4);
102     std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + indirection.size() * channels());
103     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> packed_weights(pixels() * 2);
104     std::vector<uint16_t> output((pixels() - 1) * output_stride() + channels());
105     std::vector<float> output_ref(pixels() * channels());
106 
107     for (size_t iteration = 0; iteration < iterations(); iteration++) {
108       std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
109       std::generate(packed_weights.begin(), packed_weights.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
110       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
111 
112       for (size_t i = 0; i < indirection.size(); i++) {
113         indirection[i] = input.data() + i * channels() - input_offset();
114       }
115       std::shuffle(indirection.begin(), indirection.end(), rng);
116 
117       // Compute reference results.
118       for (size_t i = 0; i < pixels(); i++) {
119         for (size_t c = 0; c < channels(); c++) {
120           const float alpha_h = fp16_ieee_to_fp32_value(packed_weights[i * 2 + 0]);
121           const float alpha_v = fp16_ieee_to_fp32_value(packed_weights[i * 2 + 1]);
122           output_ref[i * channels() + c] =
123             fp16_ieee_to_fp32_value(indirection[i * 4 + 0][c + input_offset()]) * (1.0f - alpha_h) * (1.0f - alpha_v) +
124             fp16_ieee_to_fp32_value(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (1.0f - alpha_v) +
125             fp16_ieee_to_fp32_value(indirection[i * 4 + 2][c + input_offset()]) * (1.0f - alpha_h) * alpha_v +
126             fp16_ieee_to_fp32_value(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v;
127         }
128       }
129 
130       // Call optimized micro-kernel.
131       ibilinear(
132         pixels(), channels() * sizeof(uint16_t),
133         reinterpret_cast<const void**>(indirection.data()), input_offset() * sizeof(uint16_t),
134         packed_weights.data(), output.data(),
135         (output_stride() - channels()) * sizeof(uint16_t));
136 
137       // Verify results.
138       for (size_t i = 0; i < pixels(); i++) {
139         for (size_t c = 0; c < channels(); c++) {
140           ASSERT_NEAR(
141               fp16_ieee_to_fp32_value(output[i * output_stride() + c]),
142               output_ref[i * channels() + c],
143               std::abs(output_ref[i * channels() + c]) * 1.0e-2f)
144             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
145         }
146       }
147     }
148   }
149 
Test(xnn_f32_ibilinear_ukernel_function ibilinear)150   void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const {
151     std::random_device random_device;
152     auto rng = std::mt19937(random_device());
153     std::uniform_real_distribution<float> f32dist;
154 
155     std::vector<const float*> indirection(pixels() * 4);
156     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());
157     std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
158     std::vector<float> output((pixels() - 1) * output_stride() + channels());
159     std::vector<float> output_ref(pixels() * channels());
160 
161     for (size_t iteration = 0; iteration < iterations(); iteration++) {
162       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
163       std::generate(packed_weights.begin(), packed_weights.end(), [&]() { return f32dist(rng); });
164       std::fill(output.begin(), output.end(), nanf(""));
165 
166       for (size_t i = 0; i < indirection.size(); i++) {
167         indirection[i] = input.data() + i * channels() - input_offset();
168       }
169       std::shuffle(indirection.begin(), indirection.end(), rng);
170 
171       // Compute reference results.
172       for (size_t i = 0; i < pixels(); i++) {
173         for (size_t c = 0; c < channels(); c++) {
174           const float alpha_h = packed_weights[i * 2 + 0];
175           const float alpha_v = packed_weights[i * 2 + 1];
176           output_ref[i * channels() + c] =
177             indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
178             indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) +
179             indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v +
180             indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v;
181         }
182       }
183 
184       // Call optimized micro-kernel.
185       ibilinear(
186         pixels(), channels() * sizeof(float),
187         indirection.data(), input_offset() * sizeof(float),
188         packed_weights.data(), output.data(),
189         (output_stride() - channels()) * sizeof(float));
190 
191       // Verify results.
192       for (size_t i = 0; i < pixels(); i++) {
193         for (size_t c = 0; c < channels(); c++) {
194           ASSERT_NEAR(
195               output_ref[i * channels() + c],
196               output[i * output_stride() + c],
197               std::abs(output_ref[i * channels() + c]) * 1.0e-4)
198             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
199         }
200       }
201     }
202   }
203 
Test(xnn_s8_ibilinear_ukernel_function ibilinear)204   void Test(xnn_s8_ibilinear_ukernel_function ibilinear) const {
205     std::random_device random_device;
206     auto rng = std::mt19937(random_device());
207     std::uniform_int_distribution<int32_t> i8dist(
208       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
209     std::uniform_int_distribution<int16_t> w11dist(0, 2047);
210 
211     std::vector<const int8_t*> indirection(pixels() * 4);
212     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + indirection.size() * channels());
213     std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
214     std::vector<int8_t> output((pixels() - 1) * output_stride() + channels());
215     std::vector<int8_t> output_ref(pixels() * channels());
216 
217     for (size_t iteration = 0; iteration < iterations(); iteration++) {
218       std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
219       std::generate(packed_weights.begin(), packed_weights.end(), [&]() { return w11dist(rng); });
220       std::fill(output.begin(), output.end(), INT8_C(0xFA));
221 
222       for (size_t i = 0; i < indirection.size(); i++) {
223         indirection[i] = input.data() + i * channels() - input_offset();
224       }
225       std::shuffle(indirection.begin(), indirection.end(), rng);
226 
227       // Compute reference results.
228       for (size_t i = 0; i < pixels(); i++) {
229         for (size_t c = 0; c < channels(); c++) {
230           const int32_t alpha_h = packed_weights[i * 2 + 0];
231           const int32_t alpha_v = packed_weights[i * 2 + 1];
232           const int32_t acc = math_asr_s32(
233             int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
234             int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
235             int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
236             int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v +
237             2097152, 22);
238           ASSERT_GE(acc, std::numeric_limits<int8_t>::min());
239           ASSERT_LE(acc, std::numeric_limits<int8_t>::max());
240           output_ref[i * channels() + c] = (int8_t) acc;
241         }
242       }
243 
244       // Call optimized micro-kernel.
245       ibilinear(
246         pixels(), channels() * sizeof(int8_t),
247         indirection.data(), input_offset() * sizeof(int8_t),
248         packed_weights.data(), output.data(),
249         (output_stride() - channels()) * sizeof(int8_t));
250 
251       // Verify results.
252       for (size_t i = 0; i < pixels(); i++) {
253         for (size_t c = 0; c < channels(); c++) {
254           ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c]))
255             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
256         }
257       }
258     }
259   }
260 
Test(xnn_u8_ibilinear_ukernel_function ibilinear)261   void Test(xnn_u8_ibilinear_ukernel_function ibilinear) const {
262     std::random_device random_device;
263     auto rng = std::mt19937(random_device());
264     std::uniform_int_distribution<int32_t> u8dist(
265       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
266     std::uniform_int_distribution<int16_t> w11dist(0, 2047);
267 
268     std::vector<const uint8_t*> indirection(pixels() * 4);
269     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels());
270     std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
271     std::vector<uint8_t> output((pixels() - 1) * output_stride() + channels());
272     std::vector<uint8_t> output_ref(pixels() * channels());
273 
274     for (size_t iteration = 0; iteration < iterations(); iteration++) {
275       std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
276       std::generate(packed_weights.begin(), packed_weights.end(), [&]() { return w11dist(rng); });
277       std::fill(output.begin(), output.end(), UINT8_C(0xFA));
278 
279       for (size_t i = 0; i < indirection.size(); i++) {
280         indirection[i] = input.data() + i * channels() - input_offset();
281       }
282       std::shuffle(indirection.begin(), indirection.end(), rng);
283 
284       // Compute reference results.
285       for (size_t i = 0; i < pixels(); i++) {
286         for (size_t c = 0; c < channels(); c++) {
287           const uint32_t alpha_h = uint32_t(int32_t(packed_weights[i * 2 + 0]));
288           const uint32_t alpha_v = uint32_t(int32_t(packed_weights[i * 2 + 1]));
289           const uint32_t acc = (2097152 +
290             int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
291             int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
292             int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
293             int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v) >> 22;
294           ASSERT_LE(acc, std::numeric_limits<uint8_t>::max());
295           output_ref[i * channels() + c] = (uint8_t) acc;
296         }
297       }
298 
299       // Call optimized micro-kernel.
300       ibilinear(
301         pixels(), channels() * sizeof(uint8_t),
302         indirection.data(), input_offset() * sizeof(uint8_t),
303         packed_weights.data(), output.data(),
304         (output_stride() - channels()) * sizeof(uint8_t));
305 
306       // Verify results.
307       for (size_t i = 0; i < pixels(); i++) {
308         for (size_t c = 0; c < channels(); c++) {
309           ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c]))
310             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
311         }
312       }
313     }
314   }
315 
TestCHW(xnn_f16_ibilinear_chw_ukernel_function ibilinear)316   void TestCHW(xnn_f16_ibilinear_chw_ukernel_function ibilinear) const {
317     std::random_device random_device;
318     auto rng = std::mt19937(random_device());
319     std::uniform_real_distribution<float> f32dist(0.1f, 1.0f);
320 
321     std::vector<const uint16_t*> indirection(pixels() * 2);
322     std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + (channels() - 1) * input_stride() + 4 * pixels());
323     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> packed_weights(pixels() * 2);
324     std::vector<uint16_t> output(pixels() * channels());
325     std::vector<float> output_ref(pixels() * channels());
326 
327     for (size_t iteration = 0; iteration < iterations(); iteration++) {
328       std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
329       std::generate(packed_weights.begin(), packed_weights.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
330       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
331 
332       // Indirection will point to the even ("left") pixels of the input.
333       // The kernels will expect "right" pixels to be placed right next to them.
334       for (size_t i = 0; i < indirection.size(); i++) {
335         const uint16_t* left_corner = input.data() + 2 * i - input_offset();
336         indirection[i] = left_corner;
337       }
338       std::shuffle(indirection.begin(), indirection.end(), rng);
339 
340       // Compute reference results.
341       for (size_t i = 0; i < pixels(); i++) {
342         for (size_t c = 0; c < channels(); c++) {
343           const float alpha_h = fp16_ieee_to_fp32_value(packed_weights[i * 2 + 0]);
344           const float alpha_v = fp16_ieee_to_fp32_value(packed_weights[i * 2 + 1]);
345           // `c * pixels() + i` because the output is NCHW.
346           output_ref[c * pixels() + i] =
347             // `c * indirection.size()` because the input is NCHW.
348             fp16_ieee_to_fp32_value((indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()]) * (1.0f - alpha_h) * (1.0f - alpha_v) +
349             fp16_ieee_to_fp32_value((indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()]) * alpha_h * (1.0f - alpha_v) +
350             fp16_ieee_to_fp32_value((indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()]) * (1.0f - alpha_h) * alpha_v +
351             fp16_ieee_to_fp32_value((indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()]) * alpha_h * alpha_v;
352         }
353       }
354 
355       // Call optimized micro-kernel.
356       ibilinear(
357         pixels(), channels(),
358         reinterpret_cast<const void**>(indirection.data()), input_offset() * sizeof(uint16_t),
359         packed_weights.data(), output.data(), input_stride() * sizeof(uint16_t));
360 
361       // Verify results.
362       for (size_t c = 0; c < channels(); c++) {
363         for (size_t i = 0; i < pixels(); i++) {
364           ASSERT_NEAR(
365               fp16_ieee_to_fp32_value(output[c * pixels() + i]),
366               output_ref[c * pixels() + i],
367               std::abs(output_ref[c * pixels() + i]) * 1.0e-2f)
368             << "i = " << i << ", channel = " << c;
369         }
370       }
371     }
372   }
373 
TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear)374   void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const {
375     std::random_device random_device;
376     auto rng = std::mt19937(random_device());
377     std::uniform_real_distribution<float> f32dist;
378 
379     std::vector<const float*> indirection(pixels() * 2);
380     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels());
381     std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
382     std::vector<float> output(pixels() * channels());
383     std::vector<float> output_ref(pixels() * channels());
384 
385     for (size_t iteration = 0; iteration < iterations(); iteration++) {
386       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
387       std::generate(packed_weights.begin(), packed_weights.end(), [&]() { return f32dist(rng); });
388       std::fill(output.begin(), output.end(), nanf(""));
389 
390       // Indirection will point to the even ("left") pixels of the input.
391       // The kernels will expect "right" pixels to be placed right next to them.
392       for (size_t i = 0; i < indirection.size(); i++) {
393         const float* left_corner = input.data() + 2 * i - input_offset();
394         indirection[i] = left_corner;
395       }
396       std::shuffle(indirection.begin(), indirection.end(), rng);
397 
398       // Compute reference results.
399       for (size_t i = 0; i < pixels(); i++) {
400         for (size_t c = 0; c < channels(); c++) {
401           const float alpha_h = packed_weights[i * 2 + 0];
402           const float alpha_v = packed_weights[i * 2 + 1];
403           // `c * pixels() + i` because the output is NCHW.
404           output_ref[c * pixels() + i] =
405             // `c * indirection.size()` because the input is NCHW.
406             (indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
407             (indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) +
408             (indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v +
409             (indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v;
410         }
411       }
412 
413       // Call optimized micro-kernel.
414       ibilinear(
415         pixels(), channels(),
416         indirection.data(), input_offset() * sizeof(float),
417         packed_weights.data(), output.data(), input_stride() * sizeof(float));
418 
419       // Verify results.
420       for (size_t c = 0; c < channels(); c++) {
421         for (size_t i = 0; i < pixels(); i++) {
422           ASSERT_NEAR(
423               output_ref[c * pixels() + i],
424               output[c * pixels() + i],
425               std::abs(output_ref[c * pixels() + i]) * 1.0e-4)
426             << "i = " << i << ", channel = " << c;
427         }
428       }
429     }
430   }
431 
432  private:
433   uint32_t channels_{1};
434   uint32_t pixels_{1};
435   uint32_t output_stride_{0};
436   uint32_t input_stride_{0};
437   uint32_t input_offset_{0};
438   size_t iterations_{3};
439 };
440