xref: /aosp_15_r20/external/XNNPACK/test/qu8-requantization.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <cmath>
10 #include <cstddef>
11 #include <cstdlib>
12 
13 #include <gtest/gtest.h>
14 
15 #include <xnnpack/common.h>
16 #include <xnnpack/isa-checks.h>
17 #include <xnnpack/requantization-stubs.h>
18 #include "requantization-tester.h"
19 
20 
21 /*
22  * Round-to-nearest, ties away from zero, scalar implementation using unsigned 32-bit arithmetics.
23  */
24 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,exact_divide_by_po2)25 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26   for (uint32_t s = 1; s < 32; s++) {
27     RequantizationTester()
28       .qmin(std::numeric_limits<uint8_t>::min())
29       .qmax(std::numeric_limits<uint8_t>::max())
30       .s(s)
31       .TestExactDivideByPO2(xnn_qu8_requantize_rndna__scalar_unsigned32);
32   }
33 }
34 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,exact_divide_by_po2_with_zero_point)35 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
36   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
37     for (uint32_t s = 1; s < 32; s++) {
38       RequantizationTester()
39         .zero_point(zero_point)
40         .qmin(std::numeric_limits<uint8_t>::min())
41         .qmax(std::numeric_limits<uint8_t>::max())
42         .s(s)
43         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__scalar_unsigned32);
44     }
45   }
46 }
47 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_up)48 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
49   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
50     for (uint32_t s = 1; s < 32; s++) {
51       RequantizationTester()
52         .zero_point(zero_point)
53         .qmin(std::numeric_limits<uint8_t>::min())
54         .qmax(std::numeric_limits<uint8_t>::max())
55         .s(s)
56         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__scalar_unsigned32);
57     }
58   }
59 }
60 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_down)61 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
62   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
63     for (uint32_t s = 1; s < 32; s++) {
64       RequantizationTester()
65         .zero_point(zero_point)
66         .qmin(std::numeric_limits<uint8_t>::min())
67         .qmax(std::numeric_limits<uint8_t>::max())
68         .s(s)
69         .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__scalar_unsigned32);
70     }
71   }
72 }
73 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_away)74 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
75   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
76     for (uint32_t s = 1; s < 32; s++) {
77       RequantizationTester()
78         .zero_point(zero_point)
79         .qmin(std::numeric_limits<uint8_t>::min())
80         .qmax(std::numeric_limits<uint8_t>::max())
81         .s(s)
82         .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__scalar_unsigned32);
83     }
84   }
85 }
86 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,special_cases)87 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, special_cases) {
88   RequantizationTester()
89     .qmin(std::numeric_limits<uint8_t>::min())
90     .qmax(std::numeric_limits<uint8_t>::max())
91     .TestSpecialCases(xnn_qu8_requantize_rndna__scalar_unsigned32);
92 }
93 
TEST(QU8_RNDNA__SCALAR_UNSIGNED32,random_cases)94 TEST(QU8_RNDNA__SCALAR_UNSIGNED32, random_cases) {
95   RequantizationTester()
96     .qmin(std::numeric_limits<uint8_t>::min())
97     .qmax(std::numeric_limits<uint8_t>::max())
98     .zero_point(128)
99     .iterations(100)
100     .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__scalar_unsigned32);
101 }
102 
103 
104 /*
105  * Round-to-nearest, ties away from zero, scalar implementation using unsigned 64-bit arithmetics.
106  */
107 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,exact_divide_by_po2)108 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, exact_divide_by_po2) {
109   for (uint32_t s = 1; s < 32; s++) {
110     RequantizationTester()
111       .qmin(std::numeric_limits<uint8_t>::min())
112       .qmax(std::numeric_limits<uint8_t>::max())
113       .s(s)
114       .TestExactDivideByPO2(xnn_qu8_requantize_rndna__scalar_unsigned64);
115   }
116 }
117 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,exact_divide_by_po2_with_zero_point)118 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
119   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
120     for (uint32_t s = 1; s < 32; s++) {
121       RequantizationTester()
122         .zero_point(zero_point)
123         .qmin(std::numeric_limits<uint8_t>::min())
124         .qmax(std::numeric_limits<uint8_t>::max())
125         .s(s)
126         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__scalar_unsigned64);
127     }
128   }
129 }
130 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_up)131 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
132   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
133     for (uint32_t s = 1; s < 32; s++) {
134       RequantizationTester()
135         .zero_point(zero_point)
136         .qmin(std::numeric_limits<uint8_t>::min())
137         .qmax(std::numeric_limits<uint8_t>::max())
138         .s(s)
139         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__scalar_unsigned64);
140     }
141   }
142 }
143 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_down)144 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
145   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
146     for (uint32_t s = 1; s < 32; s++) {
147       RequantizationTester()
148         .zero_point(zero_point)
149         .qmin(std::numeric_limits<uint8_t>::min())
150         .qmax(std::numeric_limits<uint8_t>::max())
151         .s(s)
152         .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__scalar_unsigned64);
153     }
154   }
155 }
156 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_away)157 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
158   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
159     for (uint32_t s = 1; s < 32; s++) {
160       RequantizationTester()
161         .zero_point(zero_point)
162         .qmin(std::numeric_limits<uint8_t>::min())
163         .qmax(std::numeric_limits<uint8_t>::max())
164         .s(s)
165         .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__scalar_unsigned64);
166     }
167   }
168 }
169 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,special_cases)170 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, special_cases) {
171   RequantizationTester()
172     .qmin(std::numeric_limits<uint8_t>::min())
173     .qmax(std::numeric_limits<uint8_t>::max())
174     .TestSpecialCases(xnn_qu8_requantize_rndna__scalar_unsigned64);
175 }
176 
TEST(QU8_RNDNA__SCALAR_UNSIGNED64,random_cases)177 TEST(QU8_RNDNA__SCALAR_UNSIGNED64, random_cases) {
178   RequantizationTester()
179     .qmin(std::numeric_limits<uint8_t>::min())
180     .qmax(std::numeric_limits<uint8_t>::max())
181     .zero_point(128)
182     .iterations(100)
183     .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__scalar_unsigned64);
184 }
185 
186 
187 /*
188  * Round-to-nearest, ties away from zero, scalar implementation using signed 64-bit arithmetics.
189  */
190 
TEST(QU8_RNDNA__SCALAR_SIGNED64,exact_divide_by_po2)191 TEST(QU8_RNDNA__SCALAR_SIGNED64, exact_divide_by_po2) {
192   for (uint32_t s = 1; s < 32; s++) {
193     RequantizationTester()
194       .qmin(std::numeric_limits<uint8_t>::min())
195       .qmax(std::numeric_limits<uint8_t>::max())
196       .s(s)
197       .TestExactDivideByPO2(xnn_qu8_requantize_rndna__scalar_signed64);
198   }
199 }
200 
TEST(QU8_RNDNA__SCALAR_SIGNED64,exact_divide_by_po2_with_zero_point)201 TEST(QU8_RNDNA__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
202   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
203     for (uint32_t s = 1; s < 32; s++) {
204       RequantizationTester()
205         .zero_point(zero_point)
206         .qmin(std::numeric_limits<uint8_t>::min())
207         .qmax(std::numeric_limits<uint8_t>::max())
208         .s(s)
209         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__scalar_signed64);
210     }
211   }
212 }
213 
TEST(QU8_RNDNA__SCALAR_SIGNED64,divide_by_po2_with_rounding_up)214 TEST(QU8_RNDNA__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
215   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
216     for (uint32_t s = 1; s < 32; s++) {
217       RequantizationTester()
218         .zero_point(zero_point)
219         .qmin(std::numeric_limits<uint8_t>::min())
220         .qmax(std::numeric_limits<uint8_t>::max())
221         .s(s)
222         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__scalar_signed64);
223     }
224   }
225 }
226 
TEST(QU8_RNDNA__SCALAR_SIGNED64,divide_by_po2_with_rounding_down)227 TEST(QU8_RNDNA__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
228   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
229     for (uint32_t s = 1; s < 32; s++) {
230       RequantizationTester()
231         .zero_point(zero_point)
232         .qmin(std::numeric_limits<uint8_t>::min())
233         .qmax(std::numeric_limits<uint8_t>::max())
234         .s(s)
235         .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__scalar_signed64);
236     }
237   }
238 }
239 
TEST(QU8_RNDNA__SCALAR_SIGNED64,divide_by_po2_with_rounding_away)240 TEST(QU8_RNDNA__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
241   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
242     for (uint32_t s = 1; s < 32; s++) {
243       RequantizationTester()
244         .zero_point(zero_point)
245         .qmin(std::numeric_limits<uint8_t>::min())
246         .qmax(std::numeric_limits<uint8_t>::max())
247         .s(s)
248         .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__scalar_signed64);
249     }
250   }
251 }
252 
TEST(QU8_RNDNA__SCALAR_SIGNED64,special_cases)253 TEST(QU8_RNDNA__SCALAR_SIGNED64, special_cases) {
254   RequantizationTester()
255     .qmin(std::numeric_limits<uint8_t>::min())
256     .qmax(std::numeric_limits<uint8_t>::max())
257     .TestSpecialCases(xnn_qu8_requantize_rndna__scalar_signed64);
258 }
259 
TEST(QU8_RNDNA__SCALAR_SIGNED64,random_cases)260 TEST(QU8_RNDNA__SCALAR_SIGNED64, random_cases) {
261   RequantizationTester()
262     .qmin(std::numeric_limits<uint8_t>::min())
263     .qmax(std::numeric_limits<uint8_t>::max())
264     .zero_point(128)
265     .iterations(100)
266     .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__scalar_signed64);
267 }
268 
269 
270 /*
271  * FP32-based scalar implementation using lrintf function.
272  */
273 
TEST(QU8_FP32__SCALAR_LRINTF,random_cases)274 TEST(QU8_FP32__SCALAR_LRINTF, random_cases) {
275   RequantizationTester()
276     .qmin(std::numeric_limits<uint8_t>::min())
277     .qmax(std::numeric_limits<uint8_t>::max())
278     .iterations(1000)
279     .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__scalar_lrintf);
280 }
281 
282 
283 /*
284  * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
285  */
286 
TEST(QU8_FP32__SCALAR_FMAGIC,random_cases)287 TEST(QU8_FP32__SCALAR_FMAGIC, random_cases) {
288   RequantizationTester()
289     .qmin(std::numeric_limits<uint8_t>::min())
290     .qmax(std::numeric_limits<uint8_t>::max())
291     .iterations(1000)
292     .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__scalar_fmagic);
293 }
294 
295 
296 /*
297  * GEMMLOWP-equivalent scalar implementation.
298  */
299 
TEST(QU8_GEMMLOWP__SCALAR,exact_divide_by_po2)300 TEST(QU8_GEMMLOWP__SCALAR, exact_divide_by_po2) {
301   for (uint32_t s = 1; s < 32; s++) {
302     RequantizationTester()
303       .qmin(std::numeric_limits<uint8_t>::min())
304       .qmax(std::numeric_limits<uint8_t>::max())
305       .s(s)
306       .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__scalar);
307   }
308 }
309 
TEST(QU8_GEMMLOWP__SCALAR,exact_divide_by_po2_with_zero_point)310 TEST(QU8_GEMMLOWP__SCALAR, exact_divide_by_po2_with_zero_point) {
311   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
312     for (uint32_t s = 1; s < 32; s++) {
313       RequantizationTester()
314         .zero_point(zero_point)
315         .qmin(std::numeric_limits<uint8_t>::min())
316         .qmax(std::numeric_limits<uint8_t>::max())
317         .s(s)
318         .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__scalar);
319     }
320   }
321 }
322 
TEST(QU8_GEMMLOWP__SCALAR,divide_by_po2_with_rounding_up)323 TEST(QU8_GEMMLOWP__SCALAR, divide_by_po2_with_rounding_up) {
324   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
325     for (uint32_t s = 1; s < 32; s++) {
326       RequantizationTester()
327         .zero_point(zero_point)
328         .qmin(std::numeric_limits<uint8_t>::min())
329         .qmax(std::numeric_limits<uint8_t>::max())
330         .s(s)
331         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_gemmlowp__scalar);
332     }
333   }
334 }
335 
336 /* No rounding down test - it fails because of upward bias in multiplication */
337 /* No rounding away test - it fails because of upward bias in multiplication */
338 
TEST(QU8_GEMMLOWP__SCALAR,special_cases)339 TEST(QU8_GEMMLOWP__SCALAR, special_cases) {
340   RequantizationTester()
341     .qmin(std::numeric_limits<uint8_t>::min())
342     .qmax(std::numeric_limits<uint8_t>::max())
343     .TestSpecialCases(xnn_qu8_requantize_gemmlowp__scalar);
344 }
345 
TEST(QU8_GEMMLOWP__SCALAR,random_cases)346 TEST(QU8_GEMMLOWP__SCALAR, random_cases) {
347   RequantizationTester()
348     .qmin(std::numeric_limits<uint8_t>::min())
349     .qmax(std::numeric_limits<uint8_t>::max())
350     .iterations(100)
351     .TestRandomCasesApproximate(xnn_qu8_requantize_gemmlowp__scalar);
352 }
353 
354 
355 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
356   /*
357    * Round-to-nearest, ties away from zero, SSE2 implementation using floating-point shuffle.
358    */
359 
TEST(QU8_RNDNA__SSE2,exact_divide_by_po2)360   TEST(QU8_RNDNA__SSE2, exact_divide_by_po2) {
361     for (uint32_t s = 1; s < 32; s++) {
362       RequantizationTester()
363         .qmin(std::numeric_limits<uint8_t>::min())
364         .qmax(std::numeric_limits<uint8_t>::max())
365         .s(s)
366         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__sse2);
367     }
368   }
369 
TEST(QU8_RNDNA__SSE2,exact_divide_by_po2_with_zero_point)370   TEST(QU8_RNDNA__SSE2, exact_divide_by_po2_with_zero_point) {
371     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
372       for (uint32_t s = 1; s < 32; s++) {
373         RequantizationTester()
374           .zero_point(zero_point)
375           .qmin(std::numeric_limits<uint8_t>::min())
376           .qmax(std::numeric_limits<uint8_t>::max())
377           .s(s)
378           .TestExactDivideByPO2(xnn_qu8_requantize_rndna__sse2);
379       }
380     }
381   }
382 
TEST(QU8_RNDNA__SSE2,divide_by_po2_with_rounding_up)383   TEST(QU8_RNDNA__SSE2, divide_by_po2_with_rounding_up) {
384     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
385       for (uint32_t s = 1; s < 32; s++) {
386         RequantizationTester()
387           .zero_point(zero_point)
388           .qmin(std::numeric_limits<uint8_t>::min())
389           .qmax(std::numeric_limits<uint8_t>::max())
390           .s(s)
391           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__sse2);
392       }
393     }
394   }
395 
TEST(QU8_RNDNA__SSE2,divide_by_po2_with_rounding_down)396   TEST(QU8_RNDNA__SSE2, divide_by_po2_with_rounding_down) {
397     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
398       for (uint32_t s = 1; s < 32; s++) {
399         RequantizationTester()
400           .zero_point(zero_point)
401           .qmin(std::numeric_limits<uint8_t>::min())
402           .qmax(std::numeric_limits<uint8_t>::max())
403           .s(s)
404           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__sse2);
405       }
406     }
407   }
408 
TEST(QU8_RNDNA__SSE2,divide_by_po2_with_rounding_away)409   TEST(QU8_RNDNA__SSE2, divide_by_po2_with_rounding_away) {
410     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
411       for (uint32_t s = 1; s < 32; s++) {
412         RequantizationTester()
413           .zero_point(zero_point)
414           .qmin(std::numeric_limits<uint8_t>::min())
415           .qmax(std::numeric_limits<uint8_t>::max())
416           .s(s)
417           .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__sse2);
418       }
419     }
420   }
421 
TEST(QU8_RNDNA__SSE2,special_cases)422   TEST(QU8_RNDNA__SSE2, special_cases) {
423     RequantizationTester()
424       .qmin(std::numeric_limits<uint8_t>::min())
425       .qmax(std::numeric_limits<uint8_t>::max())
426       .TestSpecialCases(xnn_qu8_requantize_rndna__sse2);
427   }
428 
TEST(QU8_RNDNA__SSE2,random_cases)429   TEST(QU8_RNDNA__SSE2, random_cases) {
430     RequantizationTester()
431       .qmin(std::numeric_limits<uint8_t>::min())
432       .qmax(std::numeric_limits<uint8_t>::max())
433       .zero_point(128)
434       .iterations(100)
435       .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__sse2);
436   }
437 
438 
439   /*
440    * Round-to-nearest, ties away from zero, SSSE3 implementation using floating-point shuffle.
441    */
442 
TEST(QU8_RNDNA__SSSE3,exact_divide_by_po2)443   TEST(QU8_RNDNA__SSSE3, exact_divide_by_po2) {
444     TEST_REQUIRES_X86_SSSE3;
445     for (uint32_t s = 1; s < 32; s++) {
446       RequantizationTester()
447         .qmin(std::numeric_limits<uint8_t>::min())
448         .qmax(std::numeric_limits<uint8_t>::max())
449         .s(s)
450         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__ssse3);
451     }
452   }
453 
TEST(QU8_RNDNA__SSSE3,exact_divide_by_po2_with_zero_point)454   TEST(QU8_RNDNA__SSSE3, exact_divide_by_po2_with_zero_point) {
455     TEST_REQUIRES_X86_SSSE3;
456     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
457       for (uint32_t s = 1; s < 32; s++) {
458         RequantizationTester()
459           .zero_point(zero_point)
460           .qmin(std::numeric_limits<uint8_t>::min())
461           .qmax(std::numeric_limits<uint8_t>::max())
462           .s(s)
463           .TestExactDivideByPO2(xnn_qu8_requantize_rndna__ssse3);
464       }
465     }
466   }
467 
TEST(QU8_RNDNA__SSSE3,divide_by_po2_with_rounding_up)468   TEST(QU8_RNDNA__SSSE3, divide_by_po2_with_rounding_up) {
469     TEST_REQUIRES_X86_SSSE3;
470     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
471       for (uint32_t s = 1; s < 32; s++) {
472         RequantizationTester()
473           .zero_point(zero_point)
474           .qmin(std::numeric_limits<uint8_t>::min())
475           .qmax(std::numeric_limits<uint8_t>::max())
476           .s(s)
477           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__ssse3);
478       }
479     }
480   }
481 
TEST(QU8_RNDNA__SSSE3,divide_by_po2_with_rounding_down)482   TEST(QU8_RNDNA__SSSE3, divide_by_po2_with_rounding_down) {
483     TEST_REQUIRES_X86_SSSE3;
484     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
485       for (uint32_t s = 1; s < 32; s++) {
486         RequantizationTester()
487           .zero_point(zero_point)
488           .qmin(std::numeric_limits<uint8_t>::min())
489           .qmax(std::numeric_limits<uint8_t>::max())
490           .s(s)
491           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__ssse3);
492       }
493     }
494   }
495 
TEST(QU8_RNDNA__SSSE3,divide_by_po2_with_rounding_away)496   TEST(QU8_RNDNA__SSSE3, divide_by_po2_with_rounding_away) {
497     TEST_REQUIRES_X86_SSSE3;
498     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
499       for (uint32_t s = 1; s < 32; s++) {
500         RequantizationTester()
501           .zero_point(zero_point)
502           .qmin(std::numeric_limits<uint8_t>::min())
503           .qmax(std::numeric_limits<uint8_t>::max())
504           .s(s)
505           .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__ssse3);
506       }
507     }
508   }
509 
TEST(QU8_RNDNA__SSSE3,special_cases)510   TEST(QU8_RNDNA__SSSE3, special_cases) {
511     TEST_REQUIRES_X86_SSSE3;
512     RequantizationTester()
513       .qmin(std::numeric_limits<uint8_t>::min())
514       .qmax(std::numeric_limits<uint8_t>::max())
515       .TestSpecialCases(xnn_qu8_requantize_rndna__ssse3);
516   }
517 
TEST(QU8_RNDNA__SSSE3,random_cases)518   TEST(QU8_RNDNA__SSSE3, random_cases) {
519     TEST_REQUIRES_X86_SSSE3;
520     RequantizationTester()
521       .qmin(std::numeric_limits<uint8_t>::min())
522       .qmax(std::numeric_limits<uint8_t>::max())
523       .zero_point(128)
524       .iterations(100)
525       .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__ssse3);
526   }
527 
528 
529   /*
530    * Round-to-nearest, ties away from zero, SSE4.1 implementation using static blend instruction.
531    */
532 
TEST(QU8_RNDNA__SSE4,exact_divide_by_po2)533   TEST(QU8_RNDNA__SSE4, exact_divide_by_po2) {
534     TEST_REQUIRES_X86_SSE41;
535     for (uint32_t s = 1; s < 32; s++) {
536       RequantizationTester()
537         .qmin(std::numeric_limits<uint8_t>::min())
538         .qmax(std::numeric_limits<uint8_t>::max())
539         .s(s)
540         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__sse4);
541     }
542   }
543 
TEST(QU8_RNDNA__SSE4,exact_divide_by_po2_with_zero_point)544   TEST(QU8_RNDNA__SSE4, exact_divide_by_po2_with_zero_point) {
545     TEST_REQUIRES_X86_SSE41;
546     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
547       for (uint32_t s = 1; s < 32; s++) {
548         RequantizationTester()
549           .zero_point(zero_point)
550           .qmin(std::numeric_limits<uint8_t>::min())
551           .qmax(std::numeric_limits<uint8_t>::max())
552           .s(s)
553           .TestExactDivideByPO2(xnn_qu8_requantize_rndna__sse4);
554       }
555     }
556   }
557 
TEST(QU8_RNDNA__SSE4,divide_by_po2_with_rounding_up)558   TEST(QU8_RNDNA__SSE4, divide_by_po2_with_rounding_up) {
559     TEST_REQUIRES_X86_SSE41;
560     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
561       for (uint32_t s = 1; s < 32; s++) {
562         RequantizationTester()
563           .zero_point(zero_point)
564           .qmin(std::numeric_limits<uint8_t>::min())
565           .qmax(std::numeric_limits<uint8_t>::max())
566           .s(s)
567           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__sse4);
568       }
569     }
570   }
571 
TEST(QU8_RNDNA__SSE4,divide_by_po2_with_rounding_down)572   TEST(QU8_RNDNA__SSE4, divide_by_po2_with_rounding_down) {
573     TEST_REQUIRES_X86_SSE41;
574     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
575       for (uint32_t s = 1; s < 32; s++) {
576         RequantizationTester()
577           .zero_point(zero_point)
578           .qmin(std::numeric_limits<uint8_t>::min())
579           .qmax(std::numeric_limits<uint8_t>::max())
580           .s(s)
581           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__sse4);
582       }
583     }
584   }
585 
TEST(QU8_RNDNA__SSE4,divide_by_po2_with_rounding_away)586   TEST(QU8_RNDNA__SSE4, divide_by_po2_with_rounding_away) {
587     TEST_REQUIRES_X86_SSE41;
588     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
589       for (uint32_t s = 1; s < 32; s++) {
590         RequantizationTester()
591           .zero_point(zero_point)
592           .qmin(std::numeric_limits<uint8_t>::min())
593           .qmax(std::numeric_limits<uint8_t>::max())
594           .s(s)
595           .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__sse4);
596       }
597     }
598   }
599 
TEST(QU8_RNDNA__SSE4,special_cases)600   TEST(QU8_RNDNA__SSE4, special_cases) {
601     TEST_REQUIRES_X86_SSE41;
602     RequantizationTester()
603       .qmin(std::numeric_limits<uint8_t>::min())
604       .qmax(std::numeric_limits<uint8_t>::max())
605       .TestSpecialCases(xnn_qu8_requantize_rndna__sse4);
606   }
607 
TEST(QU8_RNDNA__SSE4,random_cases)608   TEST(QU8_RNDNA__SSE4, random_cases) {
609     TEST_REQUIRES_X86_SSE41;
610     RequantizationTester()
611       .qmin(std::numeric_limits<uint8_t>::min())
612       .qmax(std::numeric_limits<uint8_t>::max())
613       .zero_point(128)
614       .iterations(100)
615       .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__sse4);
616   }
617 
618 
619   /*
620    * FP32-based x86 SSE2 implementation.
621    */
622 
TEST(QU8_FP32__SSE2,random_cases)623   TEST(QU8_FP32__SSE2, random_cases) {
624     RequantizationTester()
625       .qmin(std::numeric_limits<uint8_t>::min())
626       .qmax(std::numeric_limits<uint8_t>::max())
627       .iterations(1000)
628       .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__sse2);
629   }
630 
631 
632   /*
633    * GEMMLOWP-equivalent x86 SSE2 implementation.
634    */
635 
TEST(QU8_GEMMLOWP__SSE2,exact_divide_by_po2)636   TEST(QU8_GEMMLOWP__SSE2, exact_divide_by_po2) {
637     for (uint32_t s = 1; s < 32; s++) {
638       RequantizationTester()
639         .qmin(std::numeric_limits<uint8_t>::min())
640         .qmax(std::numeric_limits<uint8_t>::max())
641         .s(s)
642         .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__sse2);
643     }
644   }
645 
TEST(QU8_GEMMLOWP__SSE2,exact_divide_by_po2_with_zero_point)646   TEST(QU8_GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
647     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
648       for (uint32_t s = 1; s < 32; s++) {
649         RequantizationTester()
650           .zero_point(zero_point)
651           .qmin(std::numeric_limits<uint8_t>::min())
652           .qmax(std::numeric_limits<uint8_t>::max())
653           .s(s)
654           .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__sse2);
655       }
656     }
657   }
658 
TEST(QU8_GEMMLOWP__SSE2,divide_by_po2_with_rounding_up)659   TEST(QU8_GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
660     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
661       for (uint32_t s = 1; s < 32; s++) {
662         RequantizationTester()
663           .zero_point(zero_point)
664           .qmin(std::numeric_limits<uint8_t>::min())
665           .qmax(std::numeric_limits<uint8_t>::max())
666           .s(s)
667           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_gemmlowp__sse2);
668       }
669     }
670   }
671 
672   /* No rounding down test - it fails because of upward bias in multiplication */
673   /* No rounding away test - it fails because of upward bias in multiplication */
674 
TEST(QU8_GEMMLOWP__SSE2,special_cases)675   TEST(QU8_GEMMLOWP__SSE2, special_cases) {
676     RequantizationTester()
677       .qmin(std::numeric_limits<uint8_t>::min())
678       .qmax(std::numeric_limits<uint8_t>::max())
679       .TestSpecialCases(xnn_qu8_requantize_gemmlowp__sse2);
680   }
681 
TEST(QU8_GEMMLOWP__SSE2,random_cases)682   TEST(QU8_GEMMLOWP__SSE2, random_cases) {
683     RequantizationTester()
684       .qmin(std::numeric_limits<uint8_t>::min())
685       .qmax(std::numeric_limits<uint8_t>::max())
686       .iterations(100)
687       .TestRandomCasesApproximate(xnn_qu8_requantize_gemmlowp__sse2);
688   }
689 
690 
691   /*
692    * GEMMLOWP-equivalent x86 SSSE3 implementation.
693    */
694 
TEST(QU8_GEMMLOWP__SSSE3,exact_divide_by_po2)695   TEST(QU8_GEMMLOWP__SSSE3, exact_divide_by_po2) {
696     TEST_REQUIRES_X86_SSSE3;
697     for (uint32_t s = 1; s < 32; s++) {
698       RequantizationTester()
699         .qmin(std::numeric_limits<uint8_t>::min())
700         .qmax(std::numeric_limits<uint8_t>::max())
701         .s(s)
702         .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__ssse3);
703     }
704   }
705 
TEST(QU8_GEMMLOWP__SSSE3,exact_divide_by_po2_with_zero_point)706   TEST(QU8_GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
707     TEST_REQUIRES_X86_SSSE3;
708     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
709       for (uint32_t s = 1; s < 32; s++) {
710         RequantizationTester()
711           .zero_point(zero_point)
712           .qmin(std::numeric_limits<uint8_t>::min())
713           .qmax(std::numeric_limits<uint8_t>::max())
714           .s(s)
715           .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__ssse3);
716       }
717     }
718   }
719 
TEST(QU8_GEMMLOWP__SSSE3,divide_by_po2_with_rounding_up)720   TEST(QU8_GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
721     TEST_REQUIRES_X86_SSSE3;
722     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
723       for (uint32_t s = 1; s < 32; s++) {
724         RequantizationTester()
725           .zero_point(zero_point)
726           .qmin(std::numeric_limits<uint8_t>::min())
727           .qmax(std::numeric_limits<uint8_t>::max())
728           .s(s)
729           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_gemmlowp__ssse3);
730       }
731     }
732   }
733 
734   /* No rounding down test - it fails because of upward bias in multiplication */
735   /* No rounding away test - it fails because of upward bias in multiplication */
736 
TEST(QU8_GEMMLOWP__SSSE3,special_cases)737   TEST(QU8_GEMMLOWP__SSSE3, special_cases) {
738     TEST_REQUIRES_X86_SSSE3;
739     RequantizationTester()
740       .qmin(std::numeric_limits<uint8_t>::min())
741       .qmax(std::numeric_limits<uint8_t>::max())
742       .TestSpecialCases(xnn_qu8_requantize_gemmlowp__ssse3);
743   }
744 
TEST(QU8_GEMMLOWP__SSSE3,random_cases)745   TEST(QU8_GEMMLOWP__SSSE3, random_cases) {
746     TEST_REQUIRES_X86_SSSE3;
747     RequantizationTester()
748       .qmin(std::numeric_limits<uint8_t>::min())
749       .qmax(std::numeric_limits<uint8_t>::max())
750       .iterations(100)
751       .TestRandomCasesApproximate(xnn_qu8_requantize_gemmlowp__ssse3);
752   }
753 
754 
755   /*
756    * GEMMLOWP-equivalent x86 SSE4 implementation.
757    */
758 
TEST(QU8_GEMMLOWP__SSE4,exact_divide_by_po2)759   TEST(QU8_GEMMLOWP__SSE4, exact_divide_by_po2) {
760     TEST_REQUIRES_X86_SSE41;
761     for (uint32_t s = 1; s < 32; s++) {
762       RequantizationTester()
763         .qmin(std::numeric_limits<uint8_t>::min())
764         .qmax(std::numeric_limits<uint8_t>::max())
765         .s(s)
766         .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__sse4);
767     }
768   }
769 
TEST(QU8_GEMMLOWP__SSE4,exact_divide_by_po2_with_zero_point)770   TEST(QU8_GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
771     TEST_REQUIRES_X86_SSE41;
772     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
773       for (uint32_t s = 1; s < 32; s++) {
774         RequantizationTester()
775           .zero_point(zero_point)
776           .qmin(std::numeric_limits<uint8_t>::min())
777           .qmax(std::numeric_limits<uint8_t>::max())
778           .s(s)
779           .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__sse4);
780       }
781     }
782   }
783 
TEST(QU8_GEMMLOWP__SSE4,divide_by_po2_with_rounding_up)784   TEST(QU8_GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
785     TEST_REQUIRES_X86_SSE41;
786     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
787       for (uint32_t s = 1; s < 32; s++) {
788         RequantizationTester()
789           .zero_point(zero_point)
790           .qmin(std::numeric_limits<uint8_t>::min())
791           .qmax(std::numeric_limits<uint8_t>::max())
792           .s(s)
793           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_gemmlowp__sse4);
794       }
795     }
796   }
797 
798   /* No rounding down test - it fails because of upward bias in multiplication */
799   /* No rounding away test - it fails because of upward bias in multiplication */
800 
TEST(QU8_GEMMLOWP__SSE4,special_cases)801   TEST(QU8_GEMMLOWP__SSE4, special_cases) {
802     TEST_REQUIRES_X86_SSE41;
803     RequantizationTester()
804       .qmin(std::numeric_limits<uint8_t>::min())
805       .qmax(std::numeric_limits<uint8_t>::max())
806       .TestSpecialCases(xnn_qu8_requantize_gemmlowp__sse4);
807   }
808 
TEST(QU8_GEMMLOWP__SSE4,random_cases)809   TEST(QU8_GEMMLOWP__SSE4, random_cases) {
810     TEST_REQUIRES_X86_SSE41;
811     RequantizationTester()
812       .qmin(std::numeric_limits<uint8_t>::min())
813       .qmax(std::numeric_limits<uint8_t>::max())
814       .iterations(100)
815       .TestRandomCasesApproximate(xnn_qu8_requantize_gemmlowp__sse4);
816   }
817 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
818 
819 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
820   /*
821    * Round-to-nearest, ties away from zero, ARM NEON implementation.
822    */
823 
TEST(QU8_RNDNA__NEON,exact_divide_by_po2)824   TEST(QU8_RNDNA__NEON, exact_divide_by_po2) {
825     TEST_REQUIRES_ARM_NEON;
826     for (uint32_t s = 1; s < 32; s++) {
827       RequantizationTester()
828         .qmin(std::numeric_limits<uint8_t>::min())
829         .qmax(std::numeric_limits<uint8_t>::max())
830         .s(s)
831         .TestExactDivideByPO2(xnn_qu8_requantize_rndna__neon);
832     }
833   }
834 
TEST(QU8_RNDNA__NEON,exact_divide_by_po2_with_zero_point)835   TEST(QU8_RNDNA__NEON, exact_divide_by_po2_with_zero_point) {
836     TEST_REQUIRES_ARM_NEON;
837     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
838       for (uint32_t s = 1; s < 32; s++) {
839         RequantizationTester()
840           .zero_point(zero_point)
841           .qmin(std::numeric_limits<uint8_t>::min())
842           .qmax(std::numeric_limits<uint8_t>::max())
843           .s(s)
844           .TestExactDivideByPO2(xnn_qu8_requantize_rndna__neon);
845       }
846     }
847   }
848 
TEST(QU8_RNDNA__NEON,divide_by_po2_with_rounding_up)849   TEST(QU8_RNDNA__NEON, divide_by_po2_with_rounding_up) {
850     TEST_REQUIRES_ARM_NEON;
851     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
852       for (uint32_t s = 1; s < 32; s++) {
853         RequantizationTester()
854           .zero_point(zero_point)
855           .qmin(std::numeric_limits<uint8_t>::min())
856           .qmax(std::numeric_limits<uint8_t>::max())
857           .s(s)
858           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_rndna__neon);
859       }
860     }
861   }
862 
TEST(QU8_RNDNA__NEON,divide_by_po2_with_rounding_down)863   TEST(QU8_RNDNA__NEON, divide_by_po2_with_rounding_down) {
864     TEST_REQUIRES_ARM_NEON;
865     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
866       for (uint32_t s = 1; s < 32; s++) {
867         RequantizationTester()
868           .zero_point(zero_point)
869           .qmin(std::numeric_limits<uint8_t>::min())
870           .qmax(std::numeric_limits<uint8_t>::max())
871           .s(s)
872           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_rndna__neon);
873       }
874     }
875   }
876 
TEST(QU8_RNDNA__NEON,divide_by_po2_with_rounding_away)877   TEST(QU8_RNDNA__NEON, divide_by_po2_with_rounding_away) {
878     TEST_REQUIRES_ARM_NEON;
879     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
880       for (uint32_t s = 1; s < 32; s++) {
881         RequantizationTester()
882           .zero_point(zero_point)
883           .qmin(std::numeric_limits<uint8_t>::min())
884           .qmax(std::numeric_limits<uint8_t>::max())
885           .s(s)
886           .TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantize_rndna__neon);
887       }
888     }
889   }
890 
TEST(QU8_RNDNA__NEON,special_cases)891   TEST(QU8_RNDNA__NEON, special_cases) {
892     TEST_REQUIRES_ARM_NEON;
893     RequantizationTester()
894       .qmin(std::numeric_limits<uint8_t>::min())
895       .qmax(std::numeric_limits<uint8_t>::max())
896       .TestSpecialCases(xnn_qu8_requantize_rndna__neon);
897   }
898 
TEST(QU8_RNDNA__NEON,random_cases)899   TEST(QU8_RNDNA__NEON, random_cases) {
900     TEST_REQUIRES_ARM_NEON;
901     RequantizationTester()
902       .qmin(std::numeric_limits<uint8_t>::min())
903       .qmax(std::numeric_limits<uint8_t>::max())
904       .zero_point(128)
905       .iterations(100)
906       .TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantize_rndna__neon);
907   }
908 
909 
910   /*
911    * FP32-based ARM NEON implementation.
912    */
913 
TEST(QU8_FP32__NEON,random_cases)914   TEST(QU8_FP32__NEON, random_cases) {
915     TEST_REQUIRES_ARM_NEON;
916     RequantizationTester()
917       .qmin(std::numeric_limits<uint8_t>::min())
918       .qmax(std::numeric_limits<uint8_t>::max())
919       .iterations(1000)
920       .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__neon);
921   }
922 
923 
924   /*
925    * GEMMLOWP-equivalent ARM NEON implementation.
926    */
927 
TEST(QU8_GEMMLOWP__NEON,exact_divide_by_po2)928   TEST(QU8_GEMMLOWP__NEON, exact_divide_by_po2) {
929     TEST_REQUIRES_ARM_NEON;
930     for (uint32_t s = 1; s < 32; s++) {
931       RequantizationTester()
932         .qmin(std::numeric_limits<uint8_t>::min())
933         .qmax(std::numeric_limits<uint8_t>::max())
934         .s(s)
935         .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__neon);
936     }
937   }
938 
TEST(QU8_GEMMLOWP__NEON,exact_divide_by_po2_with_zero_point)939   TEST(QU8_GEMMLOWP__NEON, exact_divide_by_po2_with_zero_point) {
940     TEST_REQUIRES_ARM_NEON;
941     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
942       for (uint32_t s = 1; s < 32; s++) {
943         RequantizationTester()
944           .zero_point(zero_point)
945           .qmin(std::numeric_limits<uint8_t>::min())
946           .qmax(std::numeric_limits<uint8_t>::max())
947           .s(s)
948           .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__neon);
949       }
950     }
951   }
952 
TEST(QU8_GEMMLOWP__NEON,divide_by_po2_with_rounding_up)953   TEST(QU8_GEMMLOWP__NEON, divide_by_po2_with_rounding_up) {
954     TEST_REQUIRES_ARM_NEON;
955     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
956       for (uint32_t s = 1; s < 32; s++) {
957         RequantizationTester()
958           .zero_point(zero_point)
959           .qmin(std::numeric_limits<uint8_t>::min())
960           .qmax(std::numeric_limits<uint8_t>::max())
961           .s(s)
962           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_gemmlowp__neon);
963       }
964     }
965   }
966 
967   /* No rounding down test - it fails because of upward bias in multiplication */
968   /* No rounding away test - it fails because of upward bias in multiplication */
969 
TEST(QU8_GEMMLOWP__NEON,special_cases)970   TEST(QU8_GEMMLOWP__NEON, special_cases) {
971     TEST_REQUIRES_ARM_NEON;
972     RequantizationTester()
973       .qmin(std::numeric_limits<uint8_t>::min())
974       .qmax(std::numeric_limits<uint8_t>::max())
975       .TestSpecialCases(xnn_qu8_requantize_gemmlowp__neon);
976   }
977 
TEST(QU8_GEMMLOWP__NEON,random_cases)978   TEST(QU8_GEMMLOWP__NEON, random_cases) {
979     TEST_REQUIRES_ARM_NEON;
980     RequantizationTester()
981       .qmin(std::numeric_limits<uint8_t>::min())
982       .qmax(std::numeric_limits<uint8_t>::max())
983       .iterations(100)
984       .TestRandomCasesApproximate(xnn_qu8_requantize_gemmlowp__neon);
985   }
986 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
987 
988 #if XNN_ARCH_WASMSIMD
989   /*
990    * FP32-based ARM NEON implementation.
991    */
992 
TEST(QU8_FP32__WASMSIMD,random_cases)993   TEST(QU8_FP32__WASMSIMD, random_cases) {
994     RequantizationTester()
995       .qmin(std::numeric_limits<uint8_t>::min())
996       .qmax(std::numeric_limits<uint8_t>::max())
997       .iterations(1000)
998       .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__wasmsimd);
999   }
1000 
1001 
1002   /*
1003    * GEMMLOWP-equivalent WAsmd SIMD implementation.
1004    */
1005 
TEST(QU8_GEMMLOWP__WASMSIMD,exact_divide_by_po2)1006   TEST(QU8_GEMMLOWP__WASMSIMD, exact_divide_by_po2) {
1007     for (uint32_t s = 1; s < 32; s++) {
1008       RequantizationTester()
1009         .qmin(std::numeric_limits<uint8_t>::min())
1010         .qmax(std::numeric_limits<uint8_t>::max())
1011         .s(s)
1012         .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__wasmsimd);
1013     }
1014   }
1015 
TEST(QU8_GEMMLOWP__WASMSIMD,exact_divide_by_po2_with_zero_point)1016   TEST(QU8_GEMMLOWP__WASMSIMD, exact_divide_by_po2_with_zero_point) {
1017     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
1018       for (uint32_t s = 1; s < 32; s++) {
1019         RequantizationTester()
1020           .zero_point(zero_point)
1021           .qmin(std::numeric_limits<uint8_t>::min())
1022           .qmax(std::numeric_limits<uint8_t>::max())
1023           .s(s)
1024           .TestExactDivideByPO2(xnn_qu8_requantize_gemmlowp__wasmsimd);
1025       }
1026     }
1027   }
1028 
TEST(QU8_GEMMLOWP__WASMSIMD,divide_by_po2_with_rounding_up)1029   TEST(QU8_GEMMLOWP__WASMSIMD, divide_by_po2_with_rounding_up) {
1030     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
1031       for (uint32_t s = 1; s < 32; s++) {
1032         RequantizationTester()
1033           .zero_point(zero_point)
1034           .qmin(std::numeric_limits<uint8_t>::min())
1035           .qmax(std::numeric_limits<uint8_t>::max())
1036           .s(s)
1037           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_gemmlowp__wasmsimd);
1038       }
1039     }
1040   }
1041 
1042   /* No rounding down test - it fails because of upward bias in multiplication */
1043   /* No rounding away test - it fails because of upward bias in multiplication */
1044 
TEST(QU8_GEMMLOWP__WASMSIMD,special_cases)1045   TEST(QU8_GEMMLOWP__WASMSIMD, special_cases) {
1046     RequantizationTester()
1047       .qmin(std::numeric_limits<uint8_t>::min())
1048       .qmax(std::numeric_limits<uint8_t>::max())
1049       .TestSpecialCases(xnn_qu8_requantize_gemmlowp__wasmsimd);
1050   }
1051 
TEST(QU8_GEMMLOWP__WASMSIMD,random_cases)1052   TEST(QU8_GEMMLOWP__WASMSIMD, random_cases) {
1053     RequantizationTester()
1054       .qmin(std::numeric_limits<uint8_t>::min())
1055       .qmax(std::numeric_limits<uint8_t>::max())
1056       .iterations(100)
1057       .TestRandomCasesApproximate(xnn_qu8_requantize_gemmlowp__wasmsimd);
1058   }
1059 #endif  // XNN_ARCH_WASMSIMD
1060