1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <cmath>
10 #include <cstddef>
11 #include <cstdlib>
12
13 #include <gtest/gtest.h>
14
15 #include <xnnpack/common.h>
16 #include <xnnpack/isa-checks.h>
17 #include <xnnpack/requantization-stubs.h>
18 #include "requantization-tester.h"
19
20
21 /*
22 * Round-to-nearest, ties away from zero, scalar implementation using unsigned 32-bit arithmetics.
23 */
24
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,exact_divide_by_po2)25 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26 for (uint32_t s = 1; s < 32; s++) {
27 RequantizationTester()
28 .qmin(std::numeric_limits<int8_t>::min())
29 .qmax(std::numeric_limits<int8_t>::max())
30 .s(s)
31 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__scalar_unsigned32);
32 }
33 }
34
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,exact_divide_by_po2_with_zero_point)35 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
36 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
37 zero_point <= std::numeric_limits<int8_t>::max();
38 zero_point++)
39 {
40 for (uint32_t s = 1; s < 32; s++) {
41 RequantizationTester()
42 .zero_point(zero_point)
43 .qmin(std::numeric_limits<int8_t>::min())
44 .qmax(std::numeric_limits<int8_t>::max())
45 .s(s)
46 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__scalar_unsigned32);
47 }
48 }
49 }
50
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_up)51 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
52 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
53 zero_point <= std::numeric_limits<int8_t>::max();
54 zero_point++)
55 {
56 for (uint32_t s = 1; s < 32; s++) {
57 RequantizationTester()
58 .zero_point(zero_point)
59 .qmin(std::numeric_limits<int8_t>::min())
60 .qmax(std::numeric_limits<int8_t>::max())
61 .s(s)
62 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__scalar_unsigned32);
63 }
64 }
65 }
66
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_down)67 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
68 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
69 zero_point <= std::numeric_limits<int8_t>::max();
70 zero_point++)
71 {
72 for (uint32_t s = 1; s < 32; s++) {
73 RequantizationTester()
74 .zero_point(zero_point)
75 .qmin(std::numeric_limits<int8_t>::min())
76 .qmax(std::numeric_limits<int8_t>::max())
77 .s(s)
78 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__scalar_unsigned32);
79 }
80 }
81 }
82
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_away)83 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
84 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
85 zero_point <= std::numeric_limits<int8_t>::max();
86 zero_point++)
87 {
88 for (uint32_t s = 1; s < 32; s++) {
89 RequantizationTester()
90 .zero_point(zero_point)
91 .qmin(std::numeric_limits<int8_t>::min())
92 .qmax(std::numeric_limits<int8_t>::max())
93 .s(s)
94 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__scalar_unsigned32);
95 }
96 }
97 }
98
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,special_cases)99 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, special_cases) {
100 RequantizationTester()
101 .qmin(std::numeric_limits<int8_t>::min())
102 .qmax(std::numeric_limits<int8_t>::max())
103 .TestSpecialCases(xnn_qs8_requantize_rndna__scalar_unsigned32);
104 }
105
TEST(QS8_RNDNA__SCALAR_UNSIGNED32,random_cases)106 TEST(QS8_RNDNA__SCALAR_UNSIGNED32, random_cases) {
107 RequantizationTester()
108 .qmin(std::numeric_limits<int8_t>::min())
109 .qmax(std::numeric_limits<int8_t>::max())
110 .iterations(100)
111 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__scalar_unsigned32);
112 }
113
114
115 /*
116 * Round-to-nearest, ties away from zero, scalar implementation using unsigned 64-bit arithmetics.
117 */
118
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,exact_divide_by_po2)119 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, exact_divide_by_po2) {
120 for (uint32_t s = 1; s < 32; s++) {
121 RequantizationTester()
122 .qmin(std::numeric_limits<int8_t>::min())
123 .qmax(std::numeric_limits<int8_t>::max())
124 .s(s)
125 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__scalar_unsigned64);
126 }
127 }
128
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,exact_divide_by_po2_with_zero_point)129 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
130 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
131 zero_point <= std::numeric_limits<int8_t>::max();
132 zero_point++)
133 {
134 for (uint32_t s = 1; s < 32; s++) {
135 RequantizationTester()
136 .zero_point(zero_point)
137 .qmin(std::numeric_limits<int8_t>::min())
138 .qmax(std::numeric_limits<int8_t>::max())
139 .s(s)
140 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__scalar_unsigned64);
141 }
142 }
143 }
144
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_up)145 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
146 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
147 zero_point <= std::numeric_limits<int8_t>::max();
148 zero_point++)
149 {
150 for (uint32_t s = 1; s < 32; s++) {
151 RequantizationTester()
152 .zero_point(zero_point)
153 .qmin(std::numeric_limits<int8_t>::min())
154 .qmax(std::numeric_limits<int8_t>::max())
155 .s(s)
156 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__scalar_unsigned64);
157 }
158 }
159 }
160
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_down)161 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
162 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
163 zero_point <= std::numeric_limits<int8_t>::max();
164 zero_point++)
165 {
166 for (uint32_t s = 1; s < 32; s++) {
167 RequantizationTester()
168 .zero_point(zero_point)
169 .qmin(std::numeric_limits<int8_t>::min())
170 .qmax(std::numeric_limits<int8_t>::max())
171 .s(s)
172 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__scalar_unsigned64);
173 }
174 }
175 }
176
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_away)177 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
178 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
179 zero_point <= std::numeric_limits<int8_t>::max();
180 zero_point++)
181 {
182 for (uint32_t s = 1; s < 32; s++) {
183 RequantizationTester()
184 .zero_point(zero_point)
185 .qmin(std::numeric_limits<int8_t>::min())
186 .qmax(std::numeric_limits<int8_t>::max())
187 .s(s)
188 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__scalar_unsigned64);
189 }
190 }
191 }
192
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,special_cases)193 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, special_cases) {
194 RequantizationTester()
195 .qmin(std::numeric_limits<int8_t>::min())
196 .qmax(std::numeric_limits<int8_t>::max())
197 .TestSpecialCases(xnn_qs8_requantize_rndna__scalar_unsigned64);
198 }
199
TEST(QS8_RNDNA__SCALAR_UNSIGNED64,random_cases)200 TEST(QS8_RNDNA__SCALAR_UNSIGNED64, random_cases) {
201 RequantizationTester()
202 .qmin(std::numeric_limits<int8_t>::min())
203 .qmax(std::numeric_limits<int8_t>::max())
204 .iterations(100)
205 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__scalar_unsigned64);
206 }
207
208
209 /*
210 * Round-to-nearest, ties away from zero, scalar implementation using signed 64-bit arithmetics.
211 */
212
TEST(QS8_RNDNA__SCALAR_SIGNED64,exact_divide_by_po2)213 TEST(QS8_RNDNA__SCALAR_SIGNED64, exact_divide_by_po2) {
214 for (uint32_t s = 1; s < 32; s++) {
215 RequantizationTester()
216 .qmin(std::numeric_limits<int8_t>::min())
217 .qmax(std::numeric_limits<int8_t>::max())
218 .s(s)
219 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__scalar_signed64);
220 }
221 }
222
TEST(QS8_RNDNA__SCALAR_SIGNED64,exact_divide_by_po2_with_zero_point)223 TEST(QS8_RNDNA__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
224 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
225 zero_point <= std::numeric_limits<int8_t>::max();
226 zero_point++)
227 {
228 for (uint32_t s = 1; s < 32; s++) {
229 RequantizationTester()
230 .zero_point(zero_point)
231 .qmin(std::numeric_limits<int8_t>::min())
232 .qmax(std::numeric_limits<int8_t>::max())
233 .s(s)
234 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__scalar_signed64);
235 }
236 }
237 }
238
TEST(QS8_RNDNA__SCALAR_SIGNED64,divide_by_po2_with_rounding_up)239 TEST(QS8_RNDNA__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
240 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
241 zero_point <= std::numeric_limits<int8_t>::max();
242 zero_point++)
243 {
244 for (uint32_t s = 1; s < 32; s++) {
245 RequantizationTester()
246 .zero_point(zero_point)
247 .qmin(std::numeric_limits<int8_t>::min())
248 .qmax(std::numeric_limits<int8_t>::max())
249 .s(s)
250 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__scalar_signed64);
251 }
252 }
253 }
254
TEST(QS8_RNDNA__SCALAR_SIGNED64,divide_by_po2_with_rounding_down)255 TEST(QS8_RNDNA__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
256 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
257 zero_point <= std::numeric_limits<int8_t>::max();
258 zero_point++)
259 {
260 for (uint32_t s = 1; s < 32; s++) {
261 RequantizationTester()
262 .zero_point(zero_point)
263 .qmin(std::numeric_limits<int8_t>::min())
264 .qmax(std::numeric_limits<int8_t>::max())
265 .s(s)
266 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__scalar_signed64);
267 }
268 }
269 }
270
TEST(QS8_RNDNA__SCALAR_SIGNED64,divide_by_po2_with_rounding_away)271 TEST(QS8_RNDNA__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
272 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
273 zero_point <= std::numeric_limits<int8_t>::max();
274 zero_point++)
275 {
276 for (uint32_t s = 1; s < 32; s++) {
277 RequantizationTester()
278 .zero_point(zero_point)
279 .qmin(std::numeric_limits<int8_t>::min())
280 .qmax(std::numeric_limits<int8_t>::max())
281 .s(s)
282 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__scalar_signed64);
283 }
284 }
285 }
286
TEST(QS8_RNDNA__SCALAR_SIGNED64,special_cases)287 TEST(QS8_RNDNA__SCALAR_SIGNED64, special_cases) {
288 RequantizationTester()
289 .qmin(std::numeric_limits<int8_t>::min())
290 .qmax(std::numeric_limits<int8_t>::max())
291 .TestSpecialCases(xnn_qs8_requantize_rndna__scalar_signed64);
292 }
293
TEST(QS8_RNDNA__SCALAR_SIGNED64,random_cases)294 TEST(QS8_RNDNA__SCALAR_SIGNED64, random_cases) {
295 RequantizationTester()
296 .qmin(std::numeric_limits<int8_t>::min())
297 .qmax(std::numeric_limits<int8_t>::max())
298 .iterations(100)
299 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__scalar_signed64);
300 }
301
302
303 /*
304 * Round-to-nearest, ties up, scalar implementation using signed 64-bit arithmetics.
305 */
306
TEST(QS8_RNDNU__SCALAR,exact_divide_by_po2)307 TEST(QS8_RNDNU__SCALAR, exact_divide_by_po2) {
308 for (uint32_t s = 1; s < 32; s++) {
309 RequantizationTester()
310 .qmin(std::numeric_limits<int8_t>::min())
311 .qmax(std::numeric_limits<int8_t>::max())
312 .s(s)
313 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__scalar);
314 }
315 }
316
TEST(QS8_RNDNU__SCALAR,exact_divide_by_po2_with_zero_point)317 TEST(QS8_RNDNU__SCALAR, exact_divide_by_po2_with_zero_point) {
318 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
319 zero_point <= std::numeric_limits<int8_t>::max();
320 zero_point++)
321 {
322 for (uint32_t s = 1; s < 32; s++) {
323 RequantizationTester()
324 .zero_point(zero_point)
325 .qmin(std::numeric_limits<int8_t>::min())
326 .qmax(std::numeric_limits<int8_t>::max())
327 .s(s)
328 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__scalar);
329 }
330 }
331 }
332
TEST(QS8_RNDNU__SCALAR,divide_by_po2_with_rounding_up)333 TEST(QS8_RNDNU__SCALAR, divide_by_po2_with_rounding_up) {
334 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
335 zero_point <= std::numeric_limits<int8_t>::max();
336 zero_point++)
337 {
338 for (uint32_t s = 1; s < 32; s++) {
339 RequantizationTester()
340 .zero_point(zero_point)
341 .qmin(std::numeric_limits<int8_t>::min())
342 .qmax(std::numeric_limits<int8_t>::max())
343 .s(s)
344 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndnu__scalar);
345 }
346 }
347 }
348
TEST(QS8_RNDNU__SCALAR,divide_by_po2_with_rounding_down)349 TEST(QS8_RNDNU__SCALAR, divide_by_po2_with_rounding_down) {
350 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
351 zero_point <= std::numeric_limits<int8_t>::max();
352 zero_point++)
353 {
354 for (uint32_t s = 1; s < 32; s++) {
355 RequantizationTester()
356 .zero_point(zero_point)
357 .qmin(std::numeric_limits<int8_t>::min())
358 .qmax(std::numeric_limits<int8_t>::max())
359 .s(s)
360 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndnu__scalar);
361 }
362 }
363 }
364
TEST(QS8_RNDNU__SCALAR,divide_by_po2_with_rounding_away)365 TEST(QS8_RNDNU__SCALAR, divide_by_po2_with_rounding_away) {
366 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
367 zero_point <= std::numeric_limits<int8_t>::max();
368 zero_point++)
369 {
370 for (uint32_t s = 1; s < 32; s++) {
371 RequantizationTester()
372 .zero_point(zero_point)
373 .qmin(std::numeric_limits<int8_t>::min())
374 .qmax(std::numeric_limits<int8_t>::max())
375 .s(s)
376 .TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantize_rndnu__scalar);
377 }
378 }
379 }
380
TEST(QS8_RNDNU__SCALAR,random_cases)381 TEST(QS8_RNDNU__SCALAR, random_cases) {
382 RequantizationTester()
383 .qmin(std::numeric_limits<int8_t>::min())
384 .qmax(std::numeric_limits<int8_t>::max())
385 .iterations(100)
386 .TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantize_rndnu__scalar);
387 }
388
389
390 /*
391 * FP32-based scalar implementation using lrintf function.
392 */
393
TEST(QS8_FP32__SCALAR_LRINTF,random_cases)394 TEST(QS8_FP32__SCALAR_LRINTF, random_cases) {
395 RequantizationTester()
396 .qmin(std::numeric_limits<int8_t>::min())
397 .qmax(std::numeric_limits<int8_t>::max())
398 .iterations(1000)
399 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__scalar_lrintf);
400 }
401
402
403 /*
404 * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
405 */
406
TEST(QS8_FP32__SCALAR_FMAGIC,random_cases)407 TEST(QS8_FP32__SCALAR_FMAGIC, random_cases) {
408 RequantizationTester()
409 .qmin(std::numeric_limits<int8_t>::min())
410 .qmax(std::numeric_limits<int8_t>::max())
411 .iterations(1000)
412 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__scalar_fmagic);
413 }
414
415
416 /*
417 * GEMMLOWP-equivalent scalar implementation.
418 */
419
TEST(QS8_GEMMLOWP__SCALAR,exact_divide_by_po2)420 TEST(QS8_GEMMLOWP__SCALAR, exact_divide_by_po2) {
421 for (uint32_t s = 1; s < 32; s++) {
422 RequantizationTester()
423 .qmin(std::numeric_limits<int8_t>::min())
424 .qmax(std::numeric_limits<int8_t>::max())
425 .s(s)
426 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__scalar);
427 }
428 }
429
TEST(QS8_GEMMLOWP__SCALAR,exact_divide_by_po2_with_zero_point)430 TEST(QS8_GEMMLOWP__SCALAR, exact_divide_by_po2_with_zero_point) {
431 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
432 zero_point <= std::numeric_limits<int8_t>::max();
433 zero_point++)
434 {
435 for (uint32_t s = 1; s < 32; s++) {
436 RequantizationTester()
437 .zero_point(zero_point)
438 .qmin(std::numeric_limits<int8_t>::min())
439 .qmax(std::numeric_limits<int8_t>::max())
440 .s(s)
441 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__scalar);
442 }
443 }
444 }
445
TEST(QS8_GEMMLOWP__SCALAR,divide_by_po2_with_rounding_up)446 TEST(QS8_GEMMLOWP__SCALAR, divide_by_po2_with_rounding_up) {
447 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
448 zero_point <= std::numeric_limits<int8_t>::max();
449 zero_point++)
450 {
451 for (uint32_t s = 1; s < 32; s++) {
452 RequantizationTester()
453 .zero_point(zero_point)
454 .qmin(std::numeric_limits<int8_t>::min())
455 .qmax(std::numeric_limits<int8_t>::max())
456 .s(s)
457 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_gemmlowp__scalar);
458 }
459 }
460 }
461
462 /* No rounding down test - it fails because of upward bias in multiplication */
463 /* No rounding away test - it fails because of upward bias in multiplication */
464
TEST(QS8_GEMMLOWP__SCALAR,special_cases)465 TEST(QS8_GEMMLOWP__SCALAR, special_cases) {
466 RequantizationTester()
467 .qmin(std::numeric_limits<int8_t>::min())
468 .qmax(std::numeric_limits<int8_t>::max())
469 .TestSpecialCases(xnn_qs8_requantize_gemmlowp__scalar);
470 }
471
TEST(QS8_GEMMLOWP__SCALAR,random_cases)472 TEST(QS8_GEMMLOWP__SCALAR, random_cases) {
473 RequantizationTester()
474 .qmin(std::numeric_limits<int8_t>::min())
475 .qmax(std::numeric_limits<int8_t>::max())
476 .iterations(100)
477 .TestRandomCasesApproximate(xnn_qs8_requantize_gemmlowp__scalar);
478 }
479
480
481 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
482 /*
483 * Round-to-nearest, ties away from zero, SSE2 implementation using floating-point shuffle.
484 */
485
TEST(QS8_RNDNA__SSE2,exact_divide_by_po2)486 TEST(QS8_RNDNA__SSE2, exact_divide_by_po2) {
487 for (uint32_t s = 1; s < 32; s++) {
488 RequantizationTester()
489 .qmin(std::numeric_limits<int8_t>::min())
490 .qmax(std::numeric_limits<int8_t>::max())
491 .s(s)
492 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__sse2);
493 }
494 }
495
TEST(QS8_RNDNA__SSE2,exact_divide_by_po2_with_zero_point)496 TEST(QS8_RNDNA__SSE2, exact_divide_by_po2_with_zero_point) {
497 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
498 zero_point <= std::numeric_limits<int8_t>::max();
499 zero_point++)
500 {
501 for (uint32_t s = 1; s < 32; s++) {
502 RequantizationTester()
503 .zero_point(zero_point)
504 .qmin(std::numeric_limits<int8_t>::min())
505 .qmax(std::numeric_limits<int8_t>::max())
506 .s(s)
507 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__sse2);
508 }
509 }
510 }
511
TEST(QS8_RNDNA__SSE2,divide_by_po2_with_rounding_up)512 TEST(QS8_RNDNA__SSE2, divide_by_po2_with_rounding_up) {
513 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
514 zero_point <= std::numeric_limits<int8_t>::max();
515 zero_point++)
516 {
517 for (uint32_t s = 1; s < 32; s++) {
518 RequantizationTester()
519 .zero_point(zero_point)
520 .qmin(std::numeric_limits<int8_t>::min())
521 .qmax(std::numeric_limits<int8_t>::max())
522 .s(s)
523 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__sse2);
524 }
525 }
526 }
527
TEST(QS8_RNDNA__SSE2,divide_by_po2_with_rounding_down)528 TEST(QS8_RNDNA__SSE2, divide_by_po2_with_rounding_down) {
529 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
530 zero_point <= std::numeric_limits<int8_t>::max();
531 zero_point++)
532 {
533 for (uint32_t s = 1; s < 32; s++) {
534 RequantizationTester()
535 .zero_point(zero_point)
536 .qmin(std::numeric_limits<int8_t>::min())
537 .qmax(std::numeric_limits<int8_t>::max())
538 .s(s)
539 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__sse2);
540 }
541 }
542 }
543
TEST(QS8_RNDNA__SSE2,divide_by_po2_with_rounding_away)544 TEST(QS8_RNDNA__SSE2, divide_by_po2_with_rounding_away) {
545 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
546 zero_point <= std::numeric_limits<int8_t>::max();
547 zero_point++)
548 {
549 for (uint32_t s = 1; s < 32; s++) {
550 RequantizationTester()
551 .zero_point(zero_point)
552 .qmin(std::numeric_limits<int8_t>::min())
553 .qmax(std::numeric_limits<int8_t>::max())
554 .s(s)
555 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__sse2);
556 }
557 }
558 }
559
TEST(QS8_RNDNA__SSE2,special_cases)560 TEST(QS8_RNDNA__SSE2, special_cases) {
561 RequantizationTester()
562 .qmin(std::numeric_limits<int8_t>::min())
563 .qmax(std::numeric_limits<int8_t>::max())
564 .TestSpecialCases(xnn_qs8_requantize_rndna__sse2);
565 }
566
TEST(QS8_RNDNA__SSE2,random_cases)567 TEST(QS8_RNDNA__SSE2, random_cases) {
568 RequantizationTester()
569 .qmin(std::numeric_limits<int8_t>::min())
570 .qmax(std::numeric_limits<int8_t>::max())
571 .iterations(100)
572 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__sse2);
573 }
574
575
576 /*
577 * Round-to-nearest, ties away from zero, SSSE3 implementation using floating-point shuffle.
578 */
579
TEST(QS8_RNDNA__SSSE3,exact_divide_by_po2)580 TEST(QS8_RNDNA__SSSE3, exact_divide_by_po2) {
581 TEST_REQUIRES_X86_SSSE3;
582 for (uint32_t s = 1; s < 32; s++) {
583 RequantizationTester()
584 .qmin(std::numeric_limits<int8_t>::min())
585 .qmax(std::numeric_limits<int8_t>::max())
586 .s(s)
587 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__ssse3);
588 }
589 }
590
TEST(QS8_RNDNA__SSSE3,exact_divide_by_po2_with_zero_point)591 TEST(QS8_RNDNA__SSSE3, exact_divide_by_po2_with_zero_point) {
592 TEST_REQUIRES_X86_SSSE3;
593 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
594 zero_point <= std::numeric_limits<int8_t>::max();
595 zero_point++)
596 {
597 for (uint32_t s = 1; s < 32; s++) {
598 RequantizationTester()
599 .zero_point(zero_point)
600 .qmin(std::numeric_limits<int8_t>::min())
601 .qmax(std::numeric_limits<int8_t>::max())
602 .s(s)
603 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__ssse3);
604 }
605 }
606 }
607
TEST(QS8_RNDNA__SSSE3,divide_by_po2_with_rounding_up)608 TEST(QS8_RNDNA__SSSE3, divide_by_po2_with_rounding_up) {
609 TEST_REQUIRES_X86_SSSE3;
610 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
611 zero_point <= std::numeric_limits<int8_t>::max();
612 zero_point++)
613 {
614 for (uint32_t s = 1; s < 32; s++) {
615 RequantizationTester()
616 .zero_point(zero_point)
617 .qmin(std::numeric_limits<int8_t>::min())
618 .qmax(std::numeric_limits<int8_t>::max())
619 .s(s)
620 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__ssse3);
621 }
622 }
623 }
624
TEST(QS8_RNDNA__SSSE3,divide_by_po2_with_rounding_down)625 TEST(QS8_RNDNA__SSSE3, divide_by_po2_with_rounding_down) {
626 TEST_REQUIRES_X86_SSSE3;
627 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
628 zero_point <= std::numeric_limits<int8_t>::max();
629 zero_point++)
630 {
631 for (uint32_t s = 1; s < 32; s++) {
632 RequantizationTester()
633 .zero_point(zero_point)
634 .qmin(std::numeric_limits<int8_t>::min())
635 .qmax(std::numeric_limits<int8_t>::max())
636 .s(s)
637 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__ssse3);
638 }
639 }
640 }
641
TEST(QS8_RNDNA__SSSE3,divide_by_po2_with_rounding_away)642 TEST(QS8_RNDNA__SSSE3, divide_by_po2_with_rounding_away) {
643 TEST_REQUIRES_X86_SSSE3;
644 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
645 zero_point <= std::numeric_limits<int8_t>::max();
646 zero_point++)
647 {
648 for (uint32_t s = 1; s < 32; s++) {
649 RequantizationTester()
650 .zero_point(zero_point)
651 .qmin(std::numeric_limits<int8_t>::min())
652 .qmax(std::numeric_limits<int8_t>::max())
653 .s(s)
654 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__ssse3);
655 }
656 }
657 }
658
TEST(QS8_RNDNA__SSSE3,special_cases)659 TEST(QS8_RNDNA__SSSE3, special_cases) {
660 TEST_REQUIRES_X86_SSSE3;
661 RequantizationTester()
662 .qmin(std::numeric_limits<int8_t>::min())
663 .qmax(std::numeric_limits<int8_t>::max())
664 .TestSpecialCases(xnn_qs8_requantize_rndna__ssse3);
665 }
666
TEST(QS8_RNDNA__SSSE3,random_cases)667 TEST(QS8_RNDNA__SSSE3, random_cases) {
668 TEST_REQUIRES_X86_SSSE3;
669 RequantizationTester()
670 .qmin(std::numeric_limits<int8_t>::min())
671 .qmax(std::numeric_limits<int8_t>::max())
672 .iterations(100)
673 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__ssse3);
674 }
675
676
677 /*
678 * Round-to-nearest, ties away from zero, SSE4.1 implementation using static blend instruction.
679 */
680
TEST(QS8_RNDNA__SSE4,exact_divide_by_po2)681 TEST(QS8_RNDNA__SSE4, exact_divide_by_po2) {
682 TEST_REQUIRES_X86_SSE41;
683 for (uint32_t s = 1; s < 32; s++) {
684 RequantizationTester()
685 .qmin(std::numeric_limits<int8_t>::min())
686 .qmax(std::numeric_limits<int8_t>::max())
687 .s(s)
688 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__sse4);
689 }
690 }
691
TEST(QS8_RNDNA__SSE4,exact_divide_by_po2_with_zero_point)692 TEST(QS8_RNDNA__SSE4, exact_divide_by_po2_with_zero_point) {
693 TEST_REQUIRES_X86_SSE41;
694 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
695 zero_point <= std::numeric_limits<int8_t>::max();
696 zero_point++)
697 {
698 for (uint32_t s = 1; s < 32; s++) {
699 RequantizationTester()
700 .zero_point(zero_point)
701 .qmin(std::numeric_limits<int8_t>::min())
702 .qmax(std::numeric_limits<int8_t>::max())
703 .s(s)
704 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__sse4);
705 }
706 }
707 }
708
TEST(QS8_RNDNA__SSE4,divide_by_po2_with_rounding_up)709 TEST(QS8_RNDNA__SSE4, divide_by_po2_with_rounding_up) {
710 TEST_REQUIRES_X86_SSE41;
711 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
712 zero_point <= std::numeric_limits<int8_t>::max();
713 zero_point++)
714 {
715 for (uint32_t s = 1; s < 32; s++) {
716 RequantizationTester()
717 .zero_point(zero_point)
718 .qmin(std::numeric_limits<int8_t>::min())
719 .qmax(std::numeric_limits<int8_t>::max())
720 .s(s)
721 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__sse4);
722 }
723 }
724 }
725
TEST(QS8_RNDNA__SSE4,divide_by_po2_with_rounding_down)726 TEST(QS8_RNDNA__SSE4, divide_by_po2_with_rounding_down) {
727 TEST_REQUIRES_X86_SSE41;
728 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
729 zero_point <= std::numeric_limits<int8_t>::max();
730 zero_point++)
731 {
732 for (uint32_t s = 1; s < 32; s++) {
733 RequantizationTester()
734 .zero_point(zero_point)
735 .qmin(std::numeric_limits<int8_t>::min())
736 .qmax(std::numeric_limits<int8_t>::max())
737 .s(s)
738 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__sse4);
739 }
740 }
741 }
742
TEST(QS8_RNDNA__SSE4,divide_by_po2_with_rounding_away)743 TEST(QS8_RNDNA__SSE4, divide_by_po2_with_rounding_away) {
744 TEST_REQUIRES_X86_SSE41;
745 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
746 zero_point <= std::numeric_limits<int8_t>::max();
747 zero_point++)
748 {
749 for (uint32_t s = 1; s < 32; s++) {
750 RequantizationTester()
751 .zero_point(zero_point)
752 .qmin(std::numeric_limits<int8_t>::min())
753 .qmax(std::numeric_limits<int8_t>::max())
754 .s(s)
755 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__sse4);
756 }
757 }
758 }
759
TEST(QS8_RNDNA__SSE4,special_cases)760 TEST(QS8_RNDNA__SSE4, special_cases) {
761 TEST_REQUIRES_X86_SSE41;
762 RequantizationTester()
763 .qmin(std::numeric_limits<int8_t>::min())
764 .qmax(std::numeric_limits<int8_t>::max())
765 .TestSpecialCases(xnn_qs8_requantize_rndna__sse4);
766 }
767
TEST(QS8_RNDNA__SSE4,random_cases)768 TEST(QS8_RNDNA__SSE4, random_cases) {
769 TEST_REQUIRES_X86_SSE41;
770 RequantizationTester()
771 .qmin(std::numeric_limits<int8_t>::min())
772 .qmax(std::numeric_limits<int8_t>::max())
773 .iterations(100)
774 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__sse4);
775 }
776
777
778 /*
779 * Round-to-nearest, ties up, SSE4.1 implementation using arithmetic shift right.
780 */
781
TEST(QS8_RNDNU__SSE4_SRA,exact_divide_by_po2)782 TEST(QS8_RNDNU__SSE4_SRA, exact_divide_by_po2) {
783 TEST_REQUIRES_X86_SSE41;
784 for (uint32_t s = 1; s < 32; s++) {
785 RequantizationTester()
786 .qmin(std::numeric_limits<int8_t>::min())
787 .qmax(std::numeric_limits<int8_t>::max())
788 .s(s)
789 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__sse4_sra);
790 }
791 }
792
TEST(QS8_RNDNU__SSE4_SRA,exact_divide_by_po2_with_zero_point)793 TEST(QS8_RNDNU__SSE4_SRA, exact_divide_by_po2_with_zero_point) {
794 TEST_REQUIRES_X86_SSE41;
795 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
796 zero_point <= std::numeric_limits<int8_t>::max();
797 zero_point++)
798 {
799 for (uint32_t s = 1; s < 32; s++) {
800 RequantizationTester()
801 .zero_point(zero_point)
802 .qmin(std::numeric_limits<int8_t>::min())
803 .qmax(std::numeric_limits<int8_t>::max())
804 .s(s)
805 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__sse4_sra);
806 }
807 }
808 }
809
TEST(QS8_RNDNU__SSE4_SRA,divide_by_po2_with_rounding_up)810 TEST(QS8_RNDNU__SSE4_SRA, divide_by_po2_with_rounding_up) {
811 TEST_REQUIRES_X86_SSE41;
812 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
813 zero_point <= std::numeric_limits<int8_t>::max();
814 zero_point++)
815 {
816 for (uint32_t s = 1; s < 32; s++) {
817 RequantizationTester()
818 .zero_point(zero_point)
819 .qmin(std::numeric_limits<int8_t>::min())
820 .qmax(std::numeric_limits<int8_t>::max())
821 .s(s)
822 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndnu__sse4_sra);
823 }
824 }
825 }
826
TEST(QS8_RNDNU__SSE4_SRA,divide_by_po2_with_rounding_down)827 TEST(QS8_RNDNU__SSE4_SRA, divide_by_po2_with_rounding_down) {
828 TEST_REQUIRES_X86_SSE41;
829 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
830 zero_point <= std::numeric_limits<int8_t>::max();
831 zero_point++)
832 {
833 for (uint32_t s = 1; s < 32; s++) {
834 RequantizationTester()
835 .zero_point(zero_point)
836 .qmin(std::numeric_limits<int8_t>::min())
837 .qmax(std::numeric_limits<int8_t>::max())
838 .s(s)
839 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndnu__sse4_sra);
840 }
841 }
842 }
843
TEST(QS8_RNDNU__SSE4_SRA,divide_by_po2_with_rounding_away)844 TEST(QS8_RNDNU__SSE4_SRA, divide_by_po2_with_rounding_away) {
845 TEST_REQUIRES_X86_SSE41;
846 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
847 zero_point <= std::numeric_limits<int8_t>::max();
848 zero_point++)
849 {
850 for (uint32_t s = 1; s < 32; s++) {
851 RequantizationTester()
852 .zero_point(zero_point)
853 .qmin(std::numeric_limits<int8_t>::min())
854 .qmax(std::numeric_limits<int8_t>::max())
855 .s(s)
856 .TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantize_rndnu__sse4_sra);
857 }
858 }
859 }
860
TEST(QS8_RNDNU__SSE4_SRA,random_cases)861 TEST(QS8_RNDNU__SSE4_SRA, random_cases) {
862 TEST_REQUIRES_X86_SSE41;
863 RequantizationTester()
864 .qmin(std::numeric_limits<int8_t>::min())
865 .qmax(std::numeric_limits<int8_t>::max())
866 .iterations(100)
867 .TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantize_rndnu__sse4_sra);
868 }
869
870
871 /*
872 * Round-to-nearest, ties up, SSE4.1 implementation using logical shift right.
873 */
874
TEST(QS8_RNDNU__SSE4_SRL,exact_divide_by_po2)875 TEST(QS8_RNDNU__SSE4_SRL, exact_divide_by_po2) {
876 TEST_REQUIRES_X86_SSE41;
877 for (uint32_t s = 1; s < 32; s++) {
878 RequantizationTester()
879 .qmin(std::numeric_limits<int8_t>::min())
880 .qmax(std::numeric_limits<int8_t>::max())
881 .s(s)
882 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__sse4_srl);
883 }
884 }
885
TEST(QS8_RNDNU__SSE4_SRL,exact_divide_by_po2_with_zero_point)886 TEST(QS8_RNDNU__SSE4_SRL, exact_divide_by_po2_with_zero_point) {
887 TEST_REQUIRES_X86_SSE41;
888 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
889 zero_point <= std::numeric_limits<int8_t>::max();
890 zero_point++)
891 {
892 for (uint32_t s = 1; s < 32; s++) {
893 RequantizationTester()
894 .zero_point(zero_point)
895 .qmin(std::numeric_limits<int8_t>::min())
896 .qmax(std::numeric_limits<int8_t>::max())
897 .s(s)
898 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__sse4_srl);
899 }
900 }
901 }
902
TEST(QS8_RNDNU__SSE4_SRL,divide_by_po2_with_rounding_up)903 TEST(QS8_RNDNU__SSE4_SRL, divide_by_po2_with_rounding_up) {
904 TEST_REQUIRES_X86_SSE41;
905 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
906 zero_point <= std::numeric_limits<int8_t>::max();
907 zero_point++)
908 {
909 for (uint32_t s = 1; s < 32; s++) {
910 RequantizationTester()
911 .zero_point(zero_point)
912 .qmin(std::numeric_limits<int8_t>::min())
913 .qmax(std::numeric_limits<int8_t>::max())
914 .s(s)
915 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndnu__sse4_srl);
916 }
917 }
918 }
919
TEST(QS8_RNDNU__SSE4_SRL,divide_by_po2_with_rounding_down)920 TEST(QS8_RNDNU__SSE4_SRL, divide_by_po2_with_rounding_down) {
921 TEST_REQUIRES_X86_SSE41;
922 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
923 zero_point <= std::numeric_limits<int8_t>::max();
924 zero_point++)
925 {
926 for (uint32_t s = 1; s < 32; s++) {
927 RequantizationTester()
928 .zero_point(zero_point)
929 .qmin(std::numeric_limits<int8_t>::min())
930 .qmax(std::numeric_limits<int8_t>::max())
931 .s(s)
932 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndnu__sse4_srl);
933 }
934 }
935 }
936
TEST(QS8_RNDNU__SSE4_SRL,divide_by_po2_with_rounding_away)937 TEST(QS8_RNDNU__SSE4_SRL, divide_by_po2_with_rounding_away) {
938 TEST_REQUIRES_X86_SSE41;
939 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
940 zero_point <= std::numeric_limits<int8_t>::max();
941 zero_point++)
942 {
943 for (uint32_t s = 1; s < 32; s++) {
944 RequantizationTester()
945 .zero_point(zero_point)
946 .qmin(std::numeric_limits<int8_t>::min())
947 .qmax(std::numeric_limits<int8_t>::max())
948 .s(s)
949 .TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantize_rndnu__sse4_srl);
950 }
951 }
952 }
953
TEST(QS8_RNDNU__SSE4_SRL,random_cases)954 TEST(QS8_RNDNU__SSE4_SRL, random_cases) {
955 TEST_REQUIRES_X86_SSE41;
956 RequantizationTester()
957 .qmin(std::numeric_limits<int8_t>::min())
958 .qmax(std::numeric_limits<int8_t>::max())
959 .iterations(100)
960 .TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantize_rndnu__sse4_srl);
961 }
962
963
964 /*
965 * FP32-based x86 SSE2 implementation.
966 */
967
TEST(QS8_FP32__SSE2,random_cases)968 TEST(QS8_FP32__SSE2, random_cases) {
969 RequantizationTester()
970 .qmin(std::numeric_limits<int8_t>::min())
971 .qmax(std::numeric_limits<int8_t>::max())
972 .iterations(1000)
973 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__sse2);
974 }
975
976
977 /*
978 * FP32-based x86 SSE4 implementation.
979 */
980
TEST(QS8_FP32__SSE4,random_cases)981 TEST(QS8_FP32__SSE4, random_cases) {
982 RequantizationTester()
983 .qmin(std::numeric_limits<int8_t>::min())
984 .qmax(std::numeric_limits<int8_t>::max())
985 .iterations(1000)
986 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__sse4);
987 }
988
989
990 /*
991 * GEMMLOWP-equivalent x86 SSE2 implementation.
992 */
993
TEST(QS8_GEMMLOWP__SSE2,exact_divide_by_po2)994 TEST(QS8_GEMMLOWP__SSE2, exact_divide_by_po2) {
995 for (uint32_t s = 1; s < 32; s++) {
996 RequantizationTester()
997 .qmin(std::numeric_limits<int8_t>::min())
998 .qmax(std::numeric_limits<int8_t>::max())
999 .s(s)
1000 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__sse2);
1001 }
1002 }
1003
TEST(QS8_GEMMLOWP__SSE2,exact_divide_by_po2_with_zero_point)1004 TEST(QS8_GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
1005 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1006 zero_point <= std::numeric_limits<int8_t>::max();
1007 zero_point++)
1008 {
1009 for (uint32_t s = 1; s < 32; s++) {
1010 RequantizationTester()
1011 .zero_point(zero_point)
1012 .qmin(std::numeric_limits<int8_t>::min())
1013 .qmax(std::numeric_limits<int8_t>::max())
1014 .s(s)
1015 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__sse2);
1016 }
1017 }
1018 }
1019
TEST(QS8_GEMMLOWP__SSE2,divide_by_po2_with_rounding_up)1020 TEST(QS8_GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
1021 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1022 zero_point <= std::numeric_limits<int8_t>::max();
1023 zero_point++)
1024 {
1025 for (uint32_t s = 1; s < 32; s++) {
1026 RequantizationTester()
1027 .zero_point(zero_point)
1028 .qmin(std::numeric_limits<int8_t>::min())
1029 .qmax(std::numeric_limits<int8_t>::max())
1030 .s(s)
1031 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_gemmlowp__sse2);
1032 }
1033 }
1034 }
1035
1036 /* No rounding down test - it fails because of upward bias in multiplication */
1037 /* No rounding away test - it fails because of upward bias in multiplication */
1038
TEST(QS8_GEMMLOWP__SSE2,special_cases)1039 TEST(QS8_GEMMLOWP__SSE2, special_cases) {
1040 RequantizationTester()
1041 .qmin(std::numeric_limits<int8_t>::min())
1042 .qmax(std::numeric_limits<int8_t>::max())
1043 .TestSpecialCases(xnn_qs8_requantize_gemmlowp__sse2);
1044 }
1045
TEST(QS8_GEMMLOWP__SSE2,random_cases)1046 TEST(QS8_GEMMLOWP__SSE2, random_cases) {
1047 RequantizationTester()
1048 .qmin(std::numeric_limits<int8_t>::min())
1049 .qmax(std::numeric_limits<int8_t>::max())
1050 .iterations(100)
1051 .TestRandomCasesApproximate(xnn_qs8_requantize_gemmlowp__sse2);
1052 }
1053
1054
1055 /*
1056 * GEMMLOWP-equivalent x86 SSSE3 implementation.
1057 */
1058
TEST(QS8_GEMMLOWP__SSSE3,exact_divide_by_po2)1059 TEST(QS8_GEMMLOWP__SSSE3, exact_divide_by_po2) {
1060 TEST_REQUIRES_X86_SSSE3;
1061 for (uint32_t s = 1; s < 32; s++) {
1062 RequantizationTester()
1063 .qmin(std::numeric_limits<int8_t>::min())
1064 .qmax(std::numeric_limits<int8_t>::max())
1065 .s(s)
1066 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__ssse3);
1067 }
1068 }
1069
TEST(QS8_GEMMLOWP__SSSE3,exact_divide_by_po2_with_zero_point)1070 TEST(QS8_GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
1071 TEST_REQUIRES_X86_SSSE3;
1072 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1073 zero_point <= std::numeric_limits<int8_t>::max();
1074 zero_point++)
1075 {
1076 for (uint32_t s = 1; s < 32; s++) {
1077 RequantizationTester()
1078 .zero_point(zero_point)
1079 .qmin(std::numeric_limits<int8_t>::min())
1080 .qmax(std::numeric_limits<int8_t>::max())
1081 .s(s)
1082 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__ssse3);
1083 }
1084 }
1085 }
1086
TEST(QS8_GEMMLOWP__SSSE3,divide_by_po2_with_rounding_up)1087 TEST(QS8_GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
1088 TEST_REQUIRES_X86_SSSE3;
1089 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1090 zero_point <= std::numeric_limits<int8_t>::max();
1091 zero_point++)
1092 {
1093 for (uint32_t s = 1; s < 32; s++) {
1094 RequantizationTester()
1095 .zero_point(zero_point)
1096 .qmin(std::numeric_limits<int8_t>::min())
1097 .qmax(std::numeric_limits<int8_t>::max())
1098 .s(s)
1099 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_gemmlowp__ssse3);
1100 }
1101 }
1102 }
1103
1104 /* No rounding down test - it fails because of upward bias in multiplication */
1105 /* No rounding away test - it fails because of upward bias in multiplication */
1106
TEST(QS8_GEMMLOWP__SSSE3,special_cases)1107 TEST(QS8_GEMMLOWP__SSSE3, special_cases) {
1108 TEST_REQUIRES_X86_SSSE3;
1109 RequantizationTester()
1110 .qmin(std::numeric_limits<int8_t>::min())
1111 .qmax(std::numeric_limits<int8_t>::max())
1112 .TestSpecialCases(xnn_qs8_requantize_gemmlowp__ssse3);
1113 }
1114
TEST(QS8_GEMMLOWP__SSSE3,random_cases)1115 TEST(QS8_GEMMLOWP__SSSE3, random_cases) {
1116 TEST_REQUIRES_X86_SSSE3;
1117 RequantizationTester()
1118 .qmin(std::numeric_limits<int8_t>::min())
1119 .qmax(std::numeric_limits<int8_t>::max())
1120 .iterations(100)
1121 .TestRandomCasesApproximate(xnn_qs8_requantize_gemmlowp__ssse3);
1122 }
1123
1124
1125 /*
1126 * GEMMLOWP-equivalent x86 SSE4 implementation.
1127 */
1128
TEST(QS8_GEMMLOWP__SSE4,exact_divide_by_po2)1129 TEST(QS8_GEMMLOWP__SSE4, exact_divide_by_po2) {
1130 TEST_REQUIRES_X86_SSE41;
1131 for (uint32_t s = 1; s < 32; s++) {
1132 RequantizationTester()
1133 .qmin(std::numeric_limits<int8_t>::min())
1134 .qmax(std::numeric_limits<int8_t>::max())
1135 .s(s)
1136 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__sse4);
1137 }
1138 }
1139
TEST(QS8_GEMMLOWP__SSE4,exact_divide_by_po2_with_zero_point)1140 TEST(QS8_GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
1141 TEST_REQUIRES_X86_SSE41;
1142 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1143 zero_point <= std::numeric_limits<int8_t>::max();
1144 zero_point++)
1145 {
1146 for (uint32_t s = 1; s < 32; s++) {
1147 RequantizationTester()
1148 .zero_point(zero_point)
1149 .qmin(std::numeric_limits<int8_t>::min())
1150 .qmax(std::numeric_limits<int8_t>::max())
1151 .s(s)
1152 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__sse4);
1153 }
1154 }
1155 }
1156
TEST(QS8_GEMMLOWP__SSE4,divide_by_po2_with_rounding_up)1157 TEST(QS8_GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
1158 TEST_REQUIRES_X86_SSE41;
1159 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1160 zero_point <= std::numeric_limits<int8_t>::max();
1161 zero_point++)
1162 {
1163 for (uint32_t s = 1; s < 32; s++) {
1164 RequantizationTester()
1165 .zero_point(zero_point)
1166 .qmin(std::numeric_limits<int8_t>::min())
1167 .qmax(std::numeric_limits<int8_t>::max())
1168 .s(s)
1169 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_gemmlowp__sse4);
1170 }
1171 }
1172 }
1173
1174 /* No rounding down test - it fails because of upward bias in multiplication */
1175 /* No rounding away test - it fails because of upward bias in multiplication */
1176
TEST(QS8_GEMMLOWP__SSE4,special_cases)1177 TEST(QS8_GEMMLOWP__SSE4, special_cases) {
1178 TEST_REQUIRES_X86_SSE41;
1179 RequantizationTester()
1180 .qmin(std::numeric_limits<int8_t>::min())
1181 .qmax(std::numeric_limits<int8_t>::max())
1182 .TestSpecialCases(xnn_qs8_requantize_gemmlowp__sse4);
1183 }
1184
TEST(QS8_GEMMLOWP__SSE4,random_cases)1185 TEST(QS8_GEMMLOWP__SSE4, random_cases) {
1186 TEST_REQUIRES_X86_SSE41;
1187 RequantizationTester()
1188 .qmin(std::numeric_limits<int8_t>::min())
1189 .qmax(std::numeric_limits<int8_t>::max())
1190 .iterations(100)
1191 .TestRandomCasesApproximate(xnn_qs8_requantize_gemmlowp__sse4);
1192 }
1193 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1194
1195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
1196 /*
1197 * Round-to-nearest, ties away from zero, ARM NEON implementation.
1198 */
1199
TEST(QS8_RNDNA__NEON,exact_divide_by_po2)1200 TEST(QS8_RNDNA__NEON, exact_divide_by_po2) {
1201 TEST_REQUIRES_ARM_NEON;
1202 for (uint32_t s = 1; s < 32; s++) {
1203 RequantizationTester()
1204 .s(s)
1205 .qmin(std::numeric_limits<int8_t>::min())
1206 .qmax(std::numeric_limits<int8_t>::max())
1207 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__neon);
1208 }
1209 }
1210
TEST(QS8_RNDNA__NEON,exact_divide_by_po2_with_zero_point)1211 TEST(QS8_RNDNA__NEON, exact_divide_by_po2_with_zero_point) {
1212 TEST_REQUIRES_ARM_NEON;
1213 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1214 zero_point <= std::numeric_limits<int8_t>::max();
1215 zero_point++)
1216 {
1217 for (uint32_t s = 1; s < 32; s++) {
1218 RequantizationTester()
1219 .zero_point(zero_point)
1220 .qmin(std::numeric_limits<int8_t>::min())
1221 .qmax(std::numeric_limits<int8_t>::max())
1222 .s(s)
1223 .TestExactDivideByPO2(xnn_qs8_requantize_rndna__neon);
1224 }
1225 }
1226 }
1227
TEST(QS8_RNDNA__NEON,divide_by_po2_with_rounding_up)1228 TEST(QS8_RNDNA__NEON, divide_by_po2_with_rounding_up) {
1229 TEST_REQUIRES_ARM_NEON;
1230 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1231 zero_point <= std::numeric_limits<int8_t>::max();
1232 zero_point++)
1233 {
1234 for (uint32_t s = 1; s < 32; s++) {
1235 RequantizationTester()
1236 .zero_point(zero_point)
1237 .qmin(std::numeric_limits<int8_t>::min())
1238 .qmax(std::numeric_limits<int8_t>::max())
1239 .s(s)
1240 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndna__neon);
1241 }
1242 }
1243 }
1244
TEST(QS8_RNDNA__NEON,divide_by_po2_with_rounding_down)1245 TEST(QS8_RNDNA__NEON, divide_by_po2_with_rounding_down) {
1246 TEST_REQUIRES_ARM_NEON;
1247 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1248 zero_point <= std::numeric_limits<int8_t>::max();
1249 zero_point++)
1250 {
1251 for (uint32_t s = 1; s < 32; s++) {
1252 RequantizationTester()
1253 .zero_point(zero_point)
1254 .qmin(std::numeric_limits<int8_t>::min())
1255 .qmax(std::numeric_limits<int8_t>::max())
1256 .s(s)
1257 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndna__neon);
1258 }
1259 }
1260 }
1261
TEST(QS8_RNDNA__NEON,divide_by_po2_with_rounding_away)1262 TEST(QS8_RNDNA__NEON, divide_by_po2_with_rounding_away) {
1263 TEST_REQUIRES_ARM_NEON;
1264 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1265 zero_point <= std::numeric_limits<int8_t>::max();
1266 zero_point++)
1267 {
1268 for (uint32_t s = 1; s < 32; s++) {
1269 RequantizationTester()
1270 .zero_point(zero_point)
1271 .qmin(std::numeric_limits<int8_t>::min())
1272 .qmax(std::numeric_limits<int8_t>::max())
1273 .s(s)
1274 .TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantize_rndna__neon);
1275 }
1276 }
1277 }
1278
TEST(QS8_RNDNA__NEON,special_cases)1279 TEST(QS8_RNDNA__NEON, special_cases) {
1280 TEST_REQUIRES_ARM_NEON;
1281 RequantizationTester()
1282 .qmin(std::numeric_limits<int8_t>::min())
1283 .qmax(std::numeric_limits<int8_t>::max())
1284 .TestSpecialCases(xnn_qs8_requantize_rndna__neon);
1285 }
1286
TEST(QS8_RNDNA__NEON,random_cases)1287 TEST(QS8_RNDNA__NEON, random_cases) {
1288 TEST_REQUIRES_ARM_NEON;
1289 RequantizationTester()
1290 .qmin(std::numeric_limits<int8_t>::min())
1291 .qmax(std::numeric_limits<int8_t>::max())
1292 .iterations(100)
1293 .TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantize_rndna__neon);
1294 }
1295
1296
1297 /*
1298 * Round-to-nearest, ties up, ARM NEON implementation using extended multiplication.
1299 */
1300
TEST(QS8_RNDNU__NEON_MULL,exact_divide_by_po2)1301 TEST(QS8_RNDNU__NEON_MULL, exact_divide_by_po2) {
1302 TEST_REQUIRES_ARM_NEON;
1303 for (uint32_t s = 1; s < 32; s++) {
1304 RequantizationTester()
1305 .qmin(std::numeric_limits<int8_t>::min())
1306 .qmax(std::numeric_limits<int8_t>::max())
1307 .s(s)
1308 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__neon_mull);
1309 }
1310 }
1311
TEST(QS8_RNDNU__NEON_MULL,exact_divide_by_po2_with_zero_point)1312 TEST(QS8_RNDNU__NEON_MULL, exact_divide_by_po2_with_zero_point) {
1313 TEST_REQUIRES_ARM_NEON;
1314 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1315 zero_point <= std::numeric_limits<int8_t>::max();
1316 zero_point++)
1317 {
1318 for (uint32_t s = 1; s < 32; s++) {
1319 RequantizationTester()
1320 .zero_point(zero_point)
1321 .qmin(std::numeric_limits<int8_t>::min())
1322 .qmax(std::numeric_limits<int8_t>::max())
1323 .s(s)
1324 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__neon_mull);
1325 }
1326 }
1327 }
1328
TEST(QS8_RNDNU__NEON_MULL,divide_by_po2_with_rounding_up)1329 TEST(QS8_RNDNU__NEON_MULL, divide_by_po2_with_rounding_up) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1332 zero_point <= std::numeric_limits<int8_t>::max();
1333 zero_point++)
1334 {
1335 for (uint32_t s = 1; s < 32; s++) {
1336 RequantizationTester()
1337 .zero_point(zero_point)
1338 .qmin(std::numeric_limits<int8_t>::min())
1339 .qmax(std::numeric_limits<int8_t>::max())
1340 .s(s)
1341 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndnu__neon_mull);
1342 }
1343 }
1344 }
1345
TEST(QS8_RNDNU__NEON_MULL,divide_by_po2_with_rounding_down)1346 TEST(QS8_RNDNU__NEON_MULL, divide_by_po2_with_rounding_down) {
1347 TEST_REQUIRES_ARM_NEON;
1348 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1349 zero_point <= std::numeric_limits<int8_t>::max();
1350 zero_point++)
1351 {
1352 for (uint32_t s = 1; s < 32; s++) {
1353 RequantizationTester()
1354 .zero_point(zero_point)
1355 .qmin(std::numeric_limits<int8_t>::min())
1356 .qmax(std::numeric_limits<int8_t>::max())
1357 .s(s)
1358 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndnu__neon_mull);
1359 }
1360 }
1361 }
1362
TEST(QS8_RNDNU__NEON_MULL,divide_by_po2_with_rounding_away)1363 TEST(QS8_RNDNU__NEON_MULL, divide_by_po2_with_rounding_away) {
1364 TEST_REQUIRES_ARM_NEON;
1365 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1366 zero_point <= std::numeric_limits<int8_t>::max();
1367 zero_point++)
1368 {
1369 for (uint32_t s = 1; s < 32; s++) {
1370 RequantizationTester()
1371 .zero_point(zero_point)
1372 .qmin(std::numeric_limits<int8_t>::min())
1373 .qmax(std::numeric_limits<int8_t>::max())
1374 .s(s)
1375 .TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantize_rndnu__neon_mull);
1376 }
1377 }
1378 }
1379
TEST(QS8_RNDNU__NEON_MULL,random_cases)1380 TEST(QS8_RNDNU__NEON_MULL, random_cases) {
1381 TEST_REQUIRES_ARM_NEON;
1382 RequantizationTester()
1383 .qmin(std::numeric_limits<int8_t>::min())
1384 .qmax(std::numeric_limits<int8_t>::max())
1385 .iterations(100)
1386 .TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantize_rndnu__neon_mull);
1387 }
1388
1389
1390 /*
1391 * Round-to-nearest, ties up, ARM NEON implementation using Q31 multiplication.
1392 */
1393
TEST(QS8_RNDNU__NEON_QDMULH,exact_divide_by_po2)1394 TEST(QS8_RNDNU__NEON_QDMULH, exact_divide_by_po2) {
1395 TEST_REQUIRES_ARM_NEON;
1396 for (uint32_t s = 1; s < 32; s++) {
1397 RequantizationTester()
1398 .qmin(std::numeric_limits<int8_t>::min())
1399 .qmax(std::numeric_limits<int8_t>::max())
1400 .s(s)
1401 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__neon_qdmulh);
1402 }
1403 }
1404
TEST(QS8_RNDNU__NEON_QDMULH,exact_divide_by_po2_with_zero_point)1405 TEST(QS8_RNDNU__NEON_QDMULH, exact_divide_by_po2_with_zero_point) {
1406 TEST_REQUIRES_ARM_NEON;
1407 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1408 zero_point <= std::numeric_limits<int8_t>::max();
1409 zero_point++)
1410 {
1411 for (uint32_t s = 1; s < 32; s++) {
1412 RequantizationTester()
1413 .zero_point(zero_point)
1414 .qmin(std::numeric_limits<int8_t>::min())
1415 .qmax(std::numeric_limits<int8_t>::max())
1416 .s(s)
1417 .TestExactDivideByPO2(xnn_qs8_requantize_rndnu__neon_qdmulh);
1418 }
1419 }
1420 }
1421
TEST(QS8_RNDNU__NEON_QDMULH,divide_by_po2_with_rounding_up)1422 TEST(QS8_RNDNU__NEON_QDMULH, divide_by_po2_with_rounding_up) {
1423 TEST_REQUIRES_ARM_NEON;
1424 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1425 zero_point <= std::numeric_limits<int8_t>::max();
1426 zero_point++)
1427 {
1428 for (uint32_t s = 1; s < 32; s++) {
1429 RequantizationTester()
1430 .zero_point(zero_point)
1431 .qmin(std::numeric_limits<int8_t>::min())
1432 .qmax(std::numeric_limits<int8_t>::max())
1433 .s(s)
1434 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_rndnu__neon_qdmulh);
1435 }
1436 }
1437 }
1438
TEST(QS8_RNDNU__NEON_QDMULH,divide_by_po2_with_rounding_down)1439 TEST(QS8_RNDNU__NEON_QDMULH, divide_by_po2_with_rounding_down) {
1440 TEST_REQUIRES_ARM_NEON;
1441 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1442 zero_point <= std::numeric_limits<int8_t>::max();
1443 zero_point++)
1444 {
1445 for (uint32_t s = 1; s < 32; s++) {
1446 RequantizationTester()
1447 .zero_point(zero_point)
1448 .qmin(std::numeric_limits<int8_t>::min())
1449 .qmax(std::numeric_limits<int8_t>::max())
1450 .s(s)
1451 .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_rndnu__neon_qdmulh);
1452 }
1453 }
1454 }
1455
TEST(QS8_RNDNU__NEON_QDMULH,divide_by_po2_with_rounding_away)1456 TEST(QS8_RNDNU__NEON_QDMULH, divide_by_po2_with_rounding_away) {
1457 TEST_REQUIRES_ARM_NEON;
1458 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1459 zero_point <= std::numeric_limits<int8_t>::max();
1460 zero_point++)
1461 {
1462 for (uint32_t s = 1; s < 32; s++) {
1463 RequantizationTester()
1464 .zero_point(zero_point)
1465 .qmin(std::numeric_limits<int8_t>::min())
1466 .qmax(std::numeric_limits<int8_t>::max())
1467 .s(s)
1468 .TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantize_rndnu__neon_qdmulh);
1469 }
1470 }
1471 }
1472
TEST(QS8_RNDNU__NEON_QDMULH,random_cases)1473 TEST(QS8_RNDNU__NEON_QDMULH, random_cases) {
1474 TEST_REQUIRES_ARM_NEON;
1475 RequantizationTester()
1476 .qmin(std::numeric_limits<int8_t>::min())
1477 .qmax(std::numeric_limits<int8_t>::max())
1478 .iterations(100)
1479 .TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantize_rndnu__neon_qdmulh);
1480 }
1481
1482
1483 /*
1484 * FP32-based ARM NEON implementation.
1485 */
1486
TEST(QS8_FP32__NEON,random_cases)1487 TEST(QS8_FP32__NEON, random_cases) {
1488 TEST_REQUIRES_ARM_NEON;
1489 RequantizationTester()
1490 .qmin(std::numeric_limits<int8_t>::min())
1491 .qmax(std::numeric_limits<int8_t>::max())
1492 .iterations(1000)
1493 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__neon);
1494 }
1495
1496
1497 /*
1498 * GEMMLOWP-equivalent ARM NEON implementation.
1499 */
1500
TEST(QS8_GEMMLOWP__NEON,exact_divide_by_po2)1501 TEST(QS8_GEMMLOWP__NEON, exact_divide_by_po2) {
1502 TEST_REQUIRES_ARM_NEON;
1503 for (uint32_t s = 1; s < 32; s++) {
1504 RequantizationTester()
1505 .qmin(std::numeric_limits<int8_t>::min())
1506 .qmax(std::numeric_limits<int8_t>::max())
1507 .s(s)
1508 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__neon);
1509 }
1510 }
1511
TEST(QS8_GEMMLOWP__NEON,exact_divide_by_po2_with_zero_point)1512 TEST(QS8_GEMMLOWP__NEON, exact_divide_by_po2_with_zero_point) {
1513 TEST_REQUIRES_ARM_NEON;
1514 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1515 zero_point <= std::numeric_limits<int8_t>::max();
1516 zero_point++)
1517 {
1518 for (uint32_t s = 1; s < 32; s++) {
1519 RequantizationTester()
1520 .zero_point(zero_point)
1521 .qmin(std::numeric_limits<int8_t>::min())
1522 .qmax(std::numeric_limits<int8_t>::max())
1523 .s(s)
1524 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__neon);
1525 }
1526 }
1527 }
1528
TEST(QS8_GEMMLOWP__NEON,divide_by_po2_with_rounding_up)1529 TEST(QS8_GEMMLOWP__NEON, divide_by_po2_with_rounding_up) {
1530 TEST_REQUIRES_ARM_NEON;
1531 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1532 zero_point <= std::numeric_limits<int8_t>::max();
1533 zero_point++)
1534 {
1535 for (uint32_t s = 1; s < 32; s++) {
1536 RequantizationTester()
1537 .zero_point(zero_point)
1538 .qmin(std::numeric_limits<int8_t>::min())
1539 .qmax(std::numeric_limits<int8_t>::max())
1540 .s(s)
1541 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_gemmlowp__neon);
1542 }
1543 }
1544 }
1545
1546 /* No rounding down test - it fails because of upward bias in multiplication */
1547 /* No rounding away test - it fails because of upward bias in multiplication */
1548
TEST(QS8_GEMMLOWP__NEON,special_cases)1549 TEST(QS8_GEMMLOWP__NEON, special_cases) {
1550 TEST_REQUIRES_ARM_NEON;
1551 RequantizationTester()
1552 .qmin(std::numeric_limits<int8_t>::min())
1553 .qmax(std::numeric_limits<int8_t>::max())
1554 .TestSpecialCases(xnn_qs8_requantize_gemmlowp__neon);
1555 }
1556
TEST(QS8_GEMMLOWP__NEON,random_cases)1557 TEST(QS8_GEMMLOWP__NEON, random_cases) {
1558 TEST_REQUIRES_ARM_NEON;
1559 RequantizationTester()
1560 .qmin(std::numeric_limits<int8_t>::min())
1561 .qmax(std::numeric_limits<int8_t>::max())
1562 .iterations(100)
1563 .TestRandomCasesApproximate(xnn_qs8_requantize_gemmlowp__neon);
1564 }
1565 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1566
1567 #if XNN_ARCH_WASMSIMD
1568 /*
1569 * FP32-based WAsm SIMD implementation.
1570 */
1571
TEST(QS8_FP32__WASMSIMD,random_cases)1572 TEST(QS8_FP32__WASMSIMD, random_cases) {
1573 RequantizationTester()
1574 .qmin(std::numeric_limits<int8_t>::min())
1575 .qmax(std::numeric_limits<int8_t>::max())
1576 .iterations(1000)
1577 .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__wasmsimd);
1578 }
1579
1580 /*
1581 * GEMMLOWP-equivalent WAsm SIMD implementation.
1582 */
1583
TEST(QS8_GEMMLOWP__WASMSIMD,exact_divide_by_po2)1584 TEST(QS8_GEMMLOWP__WASMSIMD, exact_divide_by_po2) {
1585 for (uint32_t s = 1; s < 32; s++) {
1586 RequantizationTester()
1587 .qmin(std::numeric_limits<int8_t>::min())
1588 .qmax(std::numeric_limits<int8_t>::max())
1589 .s(s)
1590 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__wasmsimd);
1591 }
1592 }
1593
TEST(QS8_GEMMLOWP__WASMSIMD,exact_divide_by_po2_with_zero_point)1594 TEST(QS8_GEMMLOWP__WASMSIMD, exact_divide_by_po2_with_zero_point) {
1595 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1596 zero_point <= std::numeric_limits<int8_t>::max();
1597 zero_point++)
1598 {
1599 for (uint32_t s = 1; s < 32; s++) {
1600 RequantizationTester()
1601 .zero_point(zero_point)
1602 .qmin(std::numeric_limits<int8_t>::min())
1603 .qmax(std::numeric_limits<int8_t>::max())
1604 .s(s)
1605 .TestExactDivideByPO2(xnn_qs8_requantize_gemmlowp__wasmsimd);
1606 }
1607 }
1608 }
1609
TEST(QS8_GEMMLOWP__WASMSIMD,divide_by_po2_with_rounding_up)1610 TEST(QS8_GEMMLOWP__WASMSIMD, divide_by_po2_with_rounding_up) {
1611 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1612 zero_point <= std::numeric_limits<int8_t>::max();
1613 zero_point++)
1614 {
1615 for (uint32_t s = 1; s < 32; s++) {
1616 RequantizationTester()
1617 .zero_point(zero_point)
1618 .qmin(std::numeric_limits<int8_t>::min())
1619 .qmax(std::numeric_limits<int8_t>::max())
1620 .s(s)
1621 .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_gemmlowp__wasmsimd);
1622 }
1623 }
1624 }
1625
1626 /* No rounding down test - it fails because of upward bias in multiplication */
1627 /* No rounding away test - it fails because of upward bias in multiplication */
1628
TEST(QS8_GEMMLOWP__WASMSIMD,special_cases)1629 TEST(QS8_GEMMLOWP__WASMSIMD, special_cases) {
1630 RequantizationTester()
1631 .qmin(std::numeric_limits<int8_t>::min())
1632 .qmax(std::numeric_limits<int8_t>::max())
1633 .TestSpecialCases(xnn_qs8_requantize_gemmlowp__wasmsimd);
1634 }
1635
TEST(QS8_GEMMLOWP__WASMSIMD,random_cases)1636 TEST(QS8_GEMMLOWP__WASMSIMD, random_cases) {
1637 RequantizationTester()
1638 .qmin(std::numeric_limits<int8_t>::min())
1639 .qmax(std::numeric_limits<int8_t>::max())
1640 .iterations(100)
1641 .TestRandomCasesApproximate(xnn_qs8_requantize_gemmlowp__wasmsimd);
1642 }
1643 #endif // XNN_ARCH_WASMSIMD
1644