xref: /aosp_15_r20/external/XNNPACK/test/qs8-vadd-minmax.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/qs8-vadd-minmax.yaml
8 //   Generator: tools/generate-vbinary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vadd.h>
18 #include "vadd-microkernel-tester.h"
19 
20 
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_eq_8)22   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
23     TEST_REQUIRES_ARM_NEON;
24     VAddMicrokernelTester()
25       .batch_size(8)
26       .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
27   }
28 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_div_8)29   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32       VAddMicrokernelTester()
33         .batch_size(batch_size)
34         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
35     }
36   }
37 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_lt_8)38   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
39     TEST_REQUIRES_ARM_NEON;
40     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41       VAddMicrokernelTester()
42         .batch_size(batch_size)
43         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
44     }
45   }
46 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_gt_8)47   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
48     TEST_REQUIRES_ARM_NEON;
49     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50       VAddMicrokernelTester()
51         .batch_size(batch_size)
52         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
53     }
54   }
55 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,inplace_a)56   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
57     TEST_REQUIRES_ARM_NEON;
58     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59       VAddMicrokernelTester()
60         .batch_size(batch_size)
61         .inplace_a(true)
62         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
63     }
64   }
65 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,inplace_b)66   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
67     TEST_REQUIRES_ARM_NEON;
68     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69       VAddMicrokernelTester()
70         .batch_size(batch_size)
71         .inplace_b(true)
72         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
73     }
74   }
75 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,inplace_a_and_b)76   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
77     TEST_REQUIRES_ARM_NEON;
78     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79       VAddMicrokernelTester()
80         .batch_size(batch_size)
81         .inplace_a(true)
82         .inplace_b(true)
83         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
84     }
85   }
86 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,a_zero_point)87   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
88     TEST_REQUIRES_ARM_NEON;
89     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91         VAddMicrokernelTester()
92           .batch_size(batch_size)
93           .a_zero_point(a_zero_point)
94           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
95       }
96     }
97   }
98 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,b_zero_point)99   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
100     TEST_REQUIRES_ARM_NEON;
101     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103         VAddMicrokernelTester()
104           .batch_size(batch_size)
105           .b_zero_point(b_zero_point)
106           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
107       }
108     }
109   }
110 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,y_zero_point)111   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
112     TEST_REQUIRES_ARM_NEON;
113     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115         VAddMicrokernelTester()
116           .batch_size(batch_size)
117           .y_zero_point(y_zero_point)
118           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
119       }
120     }
121   }
122 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,a_scale)123   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
124     TEST_REQUIRES_ARM_NEON;
125     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127         VAddMicrokernelTester()
128           .batch_size(batch_size)
129           .a_scale(a_scale)
130           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
131       }
132     }
133   }
134 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,b_scale)135   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
136     TEST_REQUIRES_ARM_NEON;
137     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139         VAddMicrokernelTester()
140           .batch_size(batch_size)
141           .b_scale(b_scale)
142           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
143       }
144     }
145   }
146 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,y_scale)147   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
148     TEST_REQUIRES_ARM_NEON;
149     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151         VAddMicrokernelTester()
152           .batch_size(batch_size)
153           .y_scale(y_scale)
154           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
155       }
156     }
157   }
158 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,qmin)159   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmin) {
160     TEST_REQUIRES_ARM_NEON;
161     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162       VAddMicrokernelTester()
163         .batch_size(batch_size)
164         .qmin(128)
165         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
166     }
167   }
168 
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,qmax)169   TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmax) {
170     TEST_REQUIRES_ARM_NEON;
171     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172       VAddMicrokernelTester()
173         .batch_size(batch_size)
174         .qmax(128)
175         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
176     }
177   }
178 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
179 
180 
181 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_eq_16)182   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
183     TEST_REQUIRES_ARM_NEON;
184     VAddMicrokernelTester()
185       .batch_size(16)
186       .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
187   }
188 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_div_16)189   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
190     TEST_REQUIRES_ARM_NEON;
191     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192       VAddMicrokernelTester()
193         .batch_size(batch_size)
194         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
195     }
196   }
197 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_lt_16)198   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
199     TEST_REQUIRES_ARM_NEON;
200     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201       VAddMicrokernelTester()
202         .batch_size(batch_size)
203         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
204     }
205   }
206 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_gt_16)207   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
208     TEST_REQUIRES_ARM_NEON;
209     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210       VAddMicrokernelTester()
211         .batch_size(batch_size)
212         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
213     }
214   }
215 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,inplace_a)216   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
217     TEST_REQUIRES_ARM_NEON;
218     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219       VAddMicrokernelTester()
220         .batch_size(batch_size)
221         .inplace_a(true)
222         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
223     }
224   }
225 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,inplace_b)226   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
227     TEST_REQUIRES_ARM_NEON;
228     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229       VAddMicrokernelTester()
230         .batch_size(batch_size)
231         .inplace_b(true)
232         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
233     }
234   }
235 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,inplace_a_and_b)236   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
237     TEST_REQUIRES_ARM_NEON;
238     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239       VAddMicrokernelTester()
240         .batch_size(batch_size)
241         .inplace_a(true)
242         .inplace_b(true)
243         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
244     }
245   }
246 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,a_zero_point)247   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
248     TEST_REQUIRES_ARM_NEON;
249     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251         VAddMicrokernelTester()
252           .batch_size(batch_size)
253           .a_zero_point(a_zero_point)
254           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
255       }
256     }
257   }
258 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,b_zero_point)259   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
260     TEST_REQUIRES_ARM_NEON;
261     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263         VAddMicrokernelTester()
264           .batch_size(batch_size)
265           .b_zero_point(b_zero_point)
266           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
267       }
268     }
269   }
270 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,y_zero_point)271   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
272     TEST_REQUIRES_ARM_NEON;
273     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275         VAddMicrokernelTester()
276           .batch_size(batch_size)
277           .y_zero_point(y_zero_point)
278           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
279       }
280     }
281   }
282 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,a_scale)283   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
284     TEST_REQUIRES_ARM_NEON;
285     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287         VAddMicrokernelTester()
288           .batch_size(batch_size)
289           .a_scale(a_scale)
290           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
291       }
292     }
293   }
294 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,b_scale)295   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
296     TEST_REQUIRES_ARM_NEON;
297     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299         VAddMicrokernelTester()
300           .batch_size(batch_size)
301           .b_scale(b_scale)
302           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
303       }
304     }
305   }
306 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,y_scale)307   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
308     TEST_REQUIRES_ARM_NEON;
309     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311         VAddMicrokernelTester()
312           .batch_size(batch_size)
313           .y_scale(y_scale)
314           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
315       }
316     }
317   }
318 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,qmin)319   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmin) {
320     TEST_REQUIRES_ARM_NEON;
321     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322       VAddMicrokernelTester()
323         .batch_size(batch_size)
324         .qmin(128)
325         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
326     }
327   }
328 
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,qmax)329   TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmax) {
330     TEST_REQUIRES_ARM_NEON;
331     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332       VAddMicrokernelTester()
333         .batch_size(batch_size)
334         .qmax(128)
335         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
336     }
337   }
338 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
339 
340 
341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_eq_24)342   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_eq_24) {
343     TEST_REQUIRES_ARM_NEON;
344     VAddMicrokernelTester()
345       .batch_size(24)
346       .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
347   }
348 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_div_24)349   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_div_24) {
350     TEST_REQUIRES_ARM_NEON;
351     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
352       VAddMicrokernelTester()
353         .batch_size(batch_size)
354         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
355     }
356   }
357 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_lt_24)358   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_lt_24) {
359     TEST_REQUIRES_ARM_NEON;
360     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
361       VAddMicrokernelTester()
362         .batch_size(batch_size)
363         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
364     }
365   }
366 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_gt_24)367   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_gt_24) {
368     TEST_REQUIRES_ARM_NEON;
369     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
370       VAddMicrokernelTester()
371         .batch_size(batch_size)
372         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
373     }
374   }
375 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,inplace_a)376   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a) {
377     TEST_REQUIRES_ARM_NEON;
378     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
379       VAddMicrokernelTester()
380         .batch_size(batch_size)
381         .inplace_a(true)
382         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
383     }
384   }
385 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,inplace_b)386   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_b) {
387     TEST_REQUIRES_ARM_NEON;
388     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
389       VAddMicrokernelTester()
390         .batch_size(batch_size)
391         .inplace_b(true)
392         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
393     }
394   }
395 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,inplace_a_and_b)396   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a_and_b) {
397     TEST_REQUIRES_ARM_NEON;
398     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
399       VAddMicrokernelTester()
400         .batch_size(batch_size)
401         .inplace_a(true)
402         .inplace_b(true)
403         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
404     }
405   }
406 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,a_zero_point)407   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_zero_point) {
408     TEST_REQUIRES_ARM_NEON;
409     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
410       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411         VAddMicrokernelTester()
412           .batch_size(batch_size)
413           .a_zero_point(a_zero_point)
414           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
415       }
416     }
417   }
418 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,b_zero_point)419   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_zero_point) {
420     TEST_REQUIRES_ARM_NEON;
421     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
422       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423         VAddMicrokernelTester()
424           .batch_size(batch_size)
425           .b_zero_point(b_zero_point)
426           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
427       }
428     }
429   }
430 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,y_zero_point)431   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_zero_point) {
432     TEST_REQUIRES_ARM_NEON;
433     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
434       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435         VAddMicrokernelTester()
436           .batch_size(batch_size)
437           .y_zero_point(y_zero_point)
438           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
439       }
440     }
441   }
442 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,a_scale)443   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_scale) {
444     TEST_REQUIRES_ARM_NEON;
445     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
446       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447         VAddMicrokernelTester()
448           .batch_size(batch_size)
449           .a_scale(a_scale)
450           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
451       }
452     }
453   }
454 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,b_scale)455   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_scale) {
456     TEST_REQUIRES_ARM_NEON;
457     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
458       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459         VAddMicrokernelTester()
460           .batch_size(batch_size)
461           .b_scale(b_scale)
462           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
463       }
464     }
465   }
466 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,y_scale)467   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_scale) {
468     TEST_REQUIRES_ARM_NEON;
469     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
470       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471         VAddMicrokernelTester()
472           .batch_size(batch_size)
473           .y_scale(y_scale)
474           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
475       }
476     }
477   }
478 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,qmin)479   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmin) {
480     TEST_REQUIRES_ARM_NEON;
481     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
482       VAddMicrokernelTester()
483         .batch_size(batch_size)
484         .qmin(128)
485         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
486     }
487   }
488 
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,qmax)489   TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmax) {
490     TEST_REQUIRES_ARM_NEON;
491     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
492       VAddMicrokernelTester()
493         .batch_size(batch_size)
494         .qmax(128)
495         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
496     }
497   }
498 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
499 
500 
501 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_eq_32)502   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
503     TEST_REQUIRES_ARM_NEON;
504     VAddMicrokernelTester()
505       .batch_size(32)
506       .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
507   }
508 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_div_32)509   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
510     TEST_REQUIRES_ARM_NEON;
511     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
512       VAddMicrokernelTester()
513         .batch_size(batch_size)
514         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
515     }
516   }
517 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_lt_32)518   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
519     TEST_REQUIRES_ARM_NEON;
520     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
521       VAddMicrokernelTester()
522         .batch_size(batch_size)
523         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
524     }
525   }
526 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_gt_32)527   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
528     TEST_REQUIRES_ARM_NEON;
529     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
530       VAddMicrokernelTester()
531         .batch_size(batch_size)
532         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
533     }
534   }
535 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,inplace_a)536   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
537     TEST_REQUIRES_ARM_NEON;
538     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
539       VAddMicrokernelTester()
540         .batch_size(batch_size)
541         .inplace_a(true)
542         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
543     }
544   }
545 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,inplace_b)546   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
547     TEST_REQUIRES_ARM_NEON;
548     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
549       VAddMicrokernelTester()
550         .batch_size(batch_size)
551         .inplace_b(true)
552         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
553     }
554   }
555 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,inplace_a_and_b)556   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
557     TEST_REQUIRES_ARM_NEON;
558     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
559       VAddMicrokernelTester()
560         .batch_size(batch_size)
561         .inplace_a(true)
562         .inplace_b(true)
563         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
564     }
565   }
566 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,a_zero_point)567   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
568     TEST_REQUIRES_ARM_NEON;
569     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
570       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571         VAddMicrokernelTester()
572           .batch_size(batch_size)
573           .a_zero_point(a_zero_point)
574           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
575       }
576     }
577   }
578 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,b_zero_point)579   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
580     TEST_REQUIRES_ARM_NEON;
581     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
582       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583         VAddMicrokernelTester()
584           .batch_size(batch_size)
585           .b_zero_point(b_zero_point)
586           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
587       }
588     }
589   }
590 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,y_zero_point)591   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
592     TEST_REQUIRES_ARM_NEON;
593     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
594       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595         VAddMicrokernelTester()
596           .batch_size(batch_size)
597           .y_zero_point(y_zero_point)
598           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
599       }
600     }
601   }
602 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,a_scale)603   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
604     TEST_REQUIRES_ARM_NEON;
605     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
606       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607         VAddMicrokernelTester()
608           .batch_size(batch_size)
609           .a_scale(a_scale)
610           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
611       }
612     }
613   }
614 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,b_scale)615   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
616     TEST_REQUIRES_ARM_NEON;
617     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
618       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619         VAddMicrokernelTester()
620           .batch_size(batch_size)
621           .b_scale(b_scale)
622           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
623       }
624     }
625   }
626 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,y_scale)627   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
628     TEST_REQUIRES_ARM_NEON;
629     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
630       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631         VAddMicrokernelTester()
632           .batch_size(batch_size)
633           .y_scale(y_scale)
634           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
635       }
636     }
637   }
638 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,qmin)639   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmin) {
640     TEST_REQUIRES_ARM_NEON;
641     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
642       VAddMicrokernelTester()
643         .batch_size(batch_size)
644         .qmin(128)
645         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
646     }
647   }
648 
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,qmax)649   TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmax) {
650     TEST_REQUIRES_ARM_NEON;
651     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
652       VAddMicrokernelTester()
653         .batch_size(batch_size)
654         .qmax(128)
655         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
656     }
657   }
658 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
659 
660 
661 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_eq_16)662   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_eq_16) {
663     TEST_REQUIRES_ARM_NEON;
664     VAddMicrokernelTester()
665       .batch_size(16)
666       .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
667   }
668 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_div_16)669   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_div_16) {
670     TEST_REQUIRES_ARM_NEON;
671     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
672       VAddMicrokernelTester()
673         .batch_size(batch_size)
674         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
675     }
676   }
677 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_lt_16)678   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_lt_16) {
679     TEST_REQUIRES_ARM_NEON;
680     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
681       VAddMicrokernelTester()
682         .batch_size(batch_size)
683         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
684     }
685   }
686 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_gt_16)687   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_gt_16) {
688     TEST_REQUIRES_ARM_NEON;
689     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
690       VAddMicrokernelTester()
691         .batch_size(batch_size)
692         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
693     }
694   }
695 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,inplace_a)696   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_a) {
697     TEST_REQUIRES_ARM_NEON;
698     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
699       VAddMicrokernelTester()
700         .batch_size(batch_size)
701         .inplace_a(true)
702         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
703     }
704   }
705 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,inplace_b)706   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_b) {
707     TEST_REQUIRES_ARM_NEON;
708     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
709       VAddMicrokernelTester()
710         .batch_size(batch_size)
711         .inplace_b(true)
712         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
713     }
714   }
715 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,inplace_a_and_b)716   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_a_and_b) {
717     TEST_REQUIRES_ARM_NEON;
718     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
719       VAddMicrokernelTester()
720         .batch_size(batch_size)
721         .inplace_a(true)
722         .inplace_b(true)
723         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
724     }
725   }
726 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,a_zero_point)727   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, a_zero_point) {
728     TEST_REQUIRES_ARM_NEON;
729     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
730       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731         VAddMicrokernelTester()
732           .batch_size(batch_size)
733           .a_zero_point(a_zero_point)
734           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
735       }
736     }
737   }
738 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,b_zero_point)739   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, b_zero_point) {
740     TEST_REQUIRES_ARM_NEON;
741     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
742       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743         VAddMicrokernelTester()
744           .batch_size(batch_size)
745           .b_zero_point(b_zero_point)
746           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
747       }
748     }
749   }
750 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,y_zero_point)751   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, y_zero_point) {
752     TEST_REQUIRES_ARM_NEON;
753     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755         VAddMicrokernelTester()
756           .batch_size(batch_size)
757           .y_zero_point(y_zero_point)
758           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
759       }
760     }
761   }
762 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,a_scale)763   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, a_scale) {
764     TEST_REQUIRES_ARM_NEON;
765     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
766       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767         VAddMicrokernelTester()
768           .batch_size(batch_size)
769           .a_scale(a_scale)
770           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
771       }
772     }
773   }
774 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,b_scale)775   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, b_scale) {
776     TEST_REQUIRES_ARM_NEON;
777     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
778       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779         VAddMicrokernelTester()
780           .batch_size(batch_size)
781           .b_scale(b_scale)
782           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
783       }
784     }
785   }
786 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,y_scale)787   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, y_scale) {
788     TEST_REQUIRES_ARM_NEON;
789     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
790       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791         VAddMicrokernelTester()
792           .batch_size(batch_size)
793           .y_scale(y_scale)
794           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
795       }
796     }
797   }
798 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,qmin)799   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, qmin) {
800     TEST_REQUIRES_ARM_NEON;
801     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
802       VAddMicrokernelTester()
803         .batch_size(batch_size)
804         .qmin(128)
805         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
806     }
807   }
808 
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,qmax)809   TEST(QS8_VADD_MINMAX__NEON_LD128_X16, qmax) {
810     TEST_REQUIRES_ARM_NEON;
811     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812       VAddMicrokernelTester()
813         .batch_size(batch_size)
814         .qmax(128)
815         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
816     }
817   }
818 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
819 
820 
821 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_eq_32)822   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_eq_32) {
823     TEST_REQUIRES_ARM_NEON;
824     VAddMicrokernelTester()
825       .batch_size(32)
826       .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
827   }
828 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_div_32)829   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_div_32) {
830     TEST_REQUIRES_ARM_NEON;
831     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
832       VAddMicrokernelTester()
833         .batch_size(batch_size)
834         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
835     }
836   }
837 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_lt_32)838   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_lt_32) {
839     TEST_REQUIRES_ARM_NEON;
840     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
841       VAddMicrokernelTester()
842         .batch_size(batch_size)
843         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
844     }
845   }
846 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_gt_32)847   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_gt_32) {
848     TEST_REQUIRES_ARM_NEON;
849     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
850       VAddMicrokernelTester()
851         .batch_size(batch_size)
852         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
853     }
854   }
855 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,inplace_a)856   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_a) {
857     TEST_REQUIRES_ARM_NEON;
858     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
859       VAddMicrokernelTester()
860         .batch_size(batch_size)
861         .inplace_a(true)
862         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
863     }
864   }
865 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,inplace_b)866   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_b) {
867     TEST_REQUIRES_ARM_NEON;
868     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
869       VAddMicrokernelTester()
870         .batch_size(batch_size)
871         .inplace_b(true)
872         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
873     }
874   }
875 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,inplace_a_and_b)876   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_a_and_b) {
877     TEST_REQUIRES_ARM_NEON;
878     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
879       VAddMicrokernelTester()
880         .batch_size(batch_size)
881         .inplace_a(true)
882         .inplace_b(true)
883         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
884     }
885   }
886 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,a_zero_point)887   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, a_zero_point) {
888     TEST_REQUIRES_ARM_NEON;
889     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
890       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891         VAddMicrokernelTester()
892           .batch_size(batch_size)
893           .a_zero_point(a_zero_point)
894           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
895       }
896     }
897   }
898 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,b_zero_point)899   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, b_zero_point) {
900     TEST_REQUIRES_ARM_NEON;
901     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
902       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903         VAddMicrokernelTester()
904           .batch_size(batch_size)
905           .b_zero_point(b_zero_point)
906           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
907       }
908     }
909   }
910 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,y_zero_point)911   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, y_zero_point) {
912     TEST_REQUIRES_ARM_NEON;
913     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
914       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915         VAddMicrokernelTester()
916           .batch_size(batch_size)
917           .y_zero_point(y_zero_point)
918           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
919       }
920     }
921   }
922 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,a_scale)923   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, a_scale) {
924     TEST_REQUIRES_ARM_NEON;
925     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
926       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927         VAddMicrokernelTester()
928           .batch_size(batch_size)
929           .a_scale(a_scale)
930           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
931       }
932     }
933   }
934 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,b_scale)935   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, b_scale) {
936     TEST_REQUIRES_ARM_NEON;
937     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
938       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939         VAddMicrokernelTester()
940           .batch_size(batch_size)
941           .b_scale(b_scale)
942           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
943       }
944     }
945   }
946 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,y_scale)947   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, y_scale) {
948     TEST_REQUIRES_ARM_NEON;
949     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
950       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951         VAddMicrokernelTester()
952           .batch_size(batch_size)
953           .y_scale(y_scale)
954           .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
955       }
956     }
957   }
958 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,qmin)959   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, qmin) {
960     TEST_REQUIRES_ARM_NEON;
961     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
962       VAddMicrokernelTester()
963         .batch_size(batch_size)
964         .qmin(128)
965         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
966     }
967   }
968 
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,qmax)969   TEST(QS8_VADD_MINMAX__NEON_LD128_X32, qmax) {
970     TEST_REQUIRES_ARM_NEON;
971     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
972       VAddMicrokernelTester()
973         .batch_size(batch_size)
974         .qmax(128)
975         .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
976     }
977   }
978 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
979 
980 
981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_eq_8)982   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
983     TEST_REQUIRES_X86_SSE2;
984     VAddMicrokernelTester()
985       .batch_size(8)
986       .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
987   }
988 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_div_8)989   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
990     TEST_REQUIRES_X86_SSE2;
991     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992       VAddMicrokernelTester()
993         .batch_size(batch_size)
994         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
995     }
996   }
997 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_lt_8)998   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
999     TEST_REQUIRES_X86_SSE2;
1000     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001       VAddMicrokernelTester()
1002         .batch_size(batch_size)
1003         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1004     }
1005   }
1006 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_gt_8)1007   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
1008     TEST_REQUIRES_X86_SSE2;
1009     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010       VAddMicrokernelTester()
1011         .batch_size(batch_size)
1012         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1013     }
1014   }
1015 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_a)1016   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
1017     TEST_REQUIRES_X86_SSE2;
1018     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019       VAddMicrokernelTester()
1020         .batch_size(batch_size)
1021         .inplace_a(true)
1022         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1023     }
1024   }
1025 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_b)1026   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
1027     TEST_REQUIRES_X86_SSE2;
1028     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029       VAddMicrokernelTester()
1030         .batch_size(batch_size)
1031         .inplace_b(true)
1032         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1033     }
1034   }
1035 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_a_and_b)1036   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
1037     TEST_REQUIRES_X86_SSE2;
1038     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039       VAddMicrokernelTester()
1040         .batch_size(batch_size)
1041         .inplace_a(true)
1042         .inplace_b(true)
1043         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1044     }
1045   }
1046 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,a_zero_point)1047   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
1048     TEST_REQUIRES_X86_SSE2;
1049     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051         VAddMicrokernelTester()
1052           .batch_size(batch_size)
1053           .a_zero_point(a_zero_point)
1054           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1055       }
1056     }
1057   }
1058 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,b_zero_point)1059   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
1060     TEST_REQUIRES_X86_SSE2;
1061     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063         VAddMicrokernelTester()
1064           .batch_size(batch_size)
1065           .b_zero_point(b_zero_point)
1066           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1067       }
1068     }
1069   }
1070 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,y_zero_point)1071   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
1072     TEST_REQUIRES_X86_SSE2;
1073     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075         VAddMicrokernelTester()
1076           .batch_size(batch_size)
1077           .y_zero_point(y_zero_point)
1078           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1079       }
1080     }
1081   }
1082 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,a_scale)1083   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
1084     TEST_REQUIRES_X86_SSE2;
1085     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087         VAddMicrokernelTester()
1088           .batch_size(batch_size)
1089           .a_scale(a_scale)
1090           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1091       }
1092     }
1093   }
1094 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,b_scale)1095   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
1096     TEST_REQUIRES_X86_SSE2;
1097     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099         VAddMicrokernelTester()
1100           .batch_size(batch_size)
1101           .b_scale(b_scale)
1102           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1103       }
1104     }
1105   }
1106 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,y_scale)1107   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
1108     TEST_REQUIRES_X86_SSE2;
1109     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111         VAddMicrokernelTester()
1112           .batch_size(batch_size)
1113           .y_scale(y_scale)
1114           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1115       }
1116     }
1117   }
1118 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,qmin)1119   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
1120     TEST_REQUIRES_X86_SSE2;
1121     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122       VAddMicrokernelTester()
1123         .batch_size(batch_size)
1124         .qmin(128)
1125         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1126     }
1127   }
1128 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,qmax)1129   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
1130     TEST_REQUIRES_X86_SSE2;
1131     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132       VAddMicrokernelTester()
1133         .batch_size(batch_size)
1134         .qmax(128)
1135         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1136     }
1137   }
1138 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139 
1140 
1141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_eq_16)1142   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
1143     TEST_REQUIRES_X86_SSE2;
1144     VAddMicrokernelTester()
1145       .batch_size(16)
1146       .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1147   }
1148 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_div_16)1149   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
1150     TEST_REQUIRES_X86_SSE2;
1151     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152       VAddMicrokernelTester()
1153         .batch_size(batch_size)
1154         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1155     }
1156   }
1157 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_lt_16)1158   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
1159     TEST_REQUIRES_X86_SSE2;
1160     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161       VAddMicrokernelTester()
1162         .batch_size(batch_size)
1163         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1164     }
1165   }
1166 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_gt_16)1167   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
1168     TEST_REQUIRES_X86_SSE2;
1169     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170       VAddMicrokernelTester()
1171         .batch_size(batch_size)
1172         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1173     }
1174   }
1175 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_a)1176   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
1177     TEST_REQUIRES_X86_SSE2;
1178     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179       VAddMicrokernelTester()
1180         .batch_size(batch_size)
1181         .inplace_a(true)
1182         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1183     }
1184   }
1185 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_b)1186   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
1187     TEST_REQUIRES_X86_SSE2;
1188     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189       VAddMicrokernelTester()
1190         .batch_size(batch_size)
1191         .inplace_b(true)
1192         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1193     }
1194   }
1195 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_a_and_b)1196   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
1197     TEST_REQUIRES_X86_SSE2;
1198     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199       VAddMicrokernelTester()
1200         .batch_size(batch_size)
1201         .inplace_a(true)
1202         .inplace_b(true)
1203         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1204     }
1205   }
1206 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,a_zero_point)1207   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
1208     TEST_REQUIRES_X86_SSE2;
1209     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211         VAddMicrokernelTester()
1212           .batch_size(batch_size)
1213           .a_zero_point(a_zero_point)
1214           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1215       }
1216     }
1217   }
1218 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,b_zero_point)1219   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
1220     TEST_REQUIRES_X86_SSE2;
1221     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223         VAddMicrokernelTester()
1224           .batch_size(batch_size)
1225           .b_zero_point(b_zero_point)
1226           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1227       }
1228     }
1229   }
1230 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,y_zero_point)1231   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
1232     TEST_REQUIRES_X86_SSE2;
1233     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235         VAddMicrokernelTester()
1236           .batch_size(batch_size)
1237           .y_zero_point(y_zero_point)
1238           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1239       }
1240     }
1241   }
1242 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,a_scale)1243   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
1244     TEST_REQUIRES_X86_SSE2;
1245     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247         VAddMicrokernelTester()
1248           .batch_size(batch_size)
1249           .a_scale(a_scale)
1250           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1251       }
1252     }
1253   }
1254 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,b_scale)1255   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
1256     TEST_REQUIRES_X86_SSE2;
1257     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259         VAddMicrokernelTester()
1260           .batch_size(batch_size)
1261           .b_scale(b_scale)
1262           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1263       }
1264     }
1265   }
1266 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,y_scale)1267   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
1268     TEST_REQUIRES_X86_SSE2;
1269     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271         VAddMicrokernelTester()
1272           .batch_size(batch_size)
1273           .y_scale(y_scale)
1274           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1275       }
1276     }
1277   }
1278 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,qmin)1279   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
1280     TEST_REQUIRES_X86_SSE2;
1281     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282       VAddMicrokernelTester()
1283         .batch_size(batch_size)
1284         .qmin(128)
1285         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1286     }
1287   }
1288 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,qmax)1289   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
1290     TEST_REQUIRES_X86_SSE2;
1291     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292       VAddMicrokernelTester()
1293         .batch_size(batch_size)
1294         .qmax(128)
1295         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1296     }
1297   }
1298 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299 
1300 
1301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_eq_24)1302   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
1303     TEST_REQUIRES_X86_SSE2;
1304     VAddMicrokernelTester()
1305       .batch_size(24)
1306       .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1307   }
1308 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_div_24)1309   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
1310     TEST_REQUIRES_X86_SSE2;
1311     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1312       VAddMicrokernelTester()
1313         .batch_size(batch_size)
1314         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1315     }
1316   }
1317 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_lt_24)1318   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
1319     TEST_REQUIRES_X86_SSE2;
1320     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1321       VAddMicrokernelTester()
1322         .batch_size(batch_size)
1323         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1324     }
1325   }
1326 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_gt_24)1327   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
1328     TEST_REQUIRES_X86_SSE2;
1329     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1330       VAddMicrokernelTester()
1331         .batch_size(batch_size)
1332         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1333     }
1334   }
1335 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,inplace_a)1336   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a) {
1337     TEST_REQUIRES_X86_SSE2;
1338     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1339       VAddMicrokernelTester()
1340         .batch_size(batch_size)
1341         .inplace_a(true)
1342         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1343     }
1344   }
1345 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,inplace_b)1346   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_b) {
1347     TEST_REQUIRES_X86_SSE2;
1348     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1349       VAddMicrokernelTester()
1350         .batch_size(batch_size)
1351         .inplace_b(true)
1352         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1353     }
1354   }
1355 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,inplace_a_and_b)1356   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a_and_b) {
1357     TEST_REQUIRES_X86_SSE2;
1358     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1359       VAddMicrokernelTester()
1360         .batch_size(batch_size)
1361         .inplace_a(true)
1362         .inplace_b(true)
1363         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1364     }
1365   }
1366 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,a_zero_point)1367   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
1368     TEST_REQUIRES_X86_SSE2;
1369     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1370       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371         VAddMicrokernelTester()
1372           .batch_size(batch_size)
1373           .a_zero_point(a_zero_point)
1374           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1375       }
1376     }
1377   }
1378 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,b_zero_point)1379   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
1380     TEST_REQUIRES_X86_SSE2;
1381     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1382       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383         VAddMicrokernelTester()
1384           .batch_size(batch_size)
1385           .b_zero_point(b_zero_point)
1386           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1387       }
1388     }
1389   }
1390 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,y_zero_point)1391   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
1392     TEST_REQUIRES_X86_SSE2;
1393     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1394       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395         VAddMicrokernelTester()
1396           .batch_size(batch_size)
1397           .y_zero_point(y_zero_point)
1398           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1399       }
1400     }
1401   }
1402 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,a_scale)1403   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
1404     TEST_REQUIRES_X86_SSE2;
1405     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1406       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407         VAddMicrokernelTester()
1408           .batch_size(batch_size)
1409           .a_scale(a_scale)
1410           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1411       }
1412     }
1413   }
1414 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,b_scale)1415   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
1416     TEST_REQUIRES_X86_SSE2;
1417     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1418       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419         VAddMicrokernelTester()
1420           .batch_size(batch_size)
1421           .b_scale(b_scale)
1422           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1423       }
1424     }
1425   }
1426 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,y_scale)1427   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
1428     TEST_REQUIRES_X86_SSE2;
1429     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1430       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431         VAddMicrokernelTester()
1432           .batch_size(batch_size)
1433           .y_scale(y_scale)
1434           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1435       }
1436     }
1437   }
1438 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,qmin)1439   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
1440     TEST_REQUIRES_X86_SSE2;
1441     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1442       VAddMicrokernelTester()
1443         .batch_size(batch_size)
1444         .qmin(128)
1445         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1446     }
1447   }
1448 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,qmax)1449   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
1450     TEST_REQUIRES_X86_SSE2;
1451     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1452       VAddMicrokernelTester()
1453         .batch_size(batch_size)
1454         .qmax(128)
1455         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1456     }
1457   }
1458 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459 
1460 
1461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_eq_32)1462   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
1463     TEST_REQUIRES_X86_SSE2;
1464     VAddMicrokernelTester()
1465       .batch_size(32)
1466       .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1467   }
1468 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_div_32)1469   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1470     TEST_REQUIRES_X86_SSE2;
1471     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1472       VAddMicrokernelTester()
1473         .batch_size(batch_size)
1474         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1475     }
1476   }
1477 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_lt_32)1478   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1479     TEST_REQUIRES_X86_SSE2;
1480     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1481       VAddMicrokernelTester()
1482         .batch_size(batch_size)
1483         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1484     }
1485   }
1486 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_gt_32)1487   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1488     TEST_REQUIRES_X86_SSE2;
1489     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1490       VAddMicrokernelTester()
1491         .batch_size(batch_size)
1492         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1493     }
1494   }
1495 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,inplace_a)1496   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a) {
1497     TEST_REQUIRES_X86_SSE2;
1498     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1499       VAddMicrokernelTester()
1500         .batch_size(batch_size)
1501         .inplace_a(true)
1502         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1503     }
1504   }
1505 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,inplace_b)1506   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_b) {
1507     TEST_REQUIRES_X86_SSE2;
1508     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1509       VAddMicrokernelTester()
1510         .batch_size(batch_size)
1511         .inplace_b(true)
1512         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1513     }
1514   }
1515 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,inplace_a_and_b)1516   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a_and_b) {
1517     TEST_REQUIRES_X86_SSE2;
1518     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1519       VAddMicrokernelTester()
1520         .batch_size(batch_size)
1521         .inplace_a(true)
1522         .inplace_b(true)
1523         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1524     }
1525   }
1526 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,a_zero_point)1527   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1528     TEST_REQUIRES_X86_SSE2;
1529     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1530       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531         VAddMicrokernelTester()
1532           .batch_size(batch_size)
1533           .a_zero_point(a_zero_point)
1534           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1535       }
1536     }
1537   }
1538 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,b_zero_point)1539   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1540     TEST_REQUIRES_X86_SSE2;
1541     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1542       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543         VAddMicrokernelTester()
1544           .batch_size(batch_size)
1545           .b_zero_point(b_zero_point)
1546           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1547       }
1548     }
1549   }
1550 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,y_zero_point)1551   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1552     TEST_REQUIRES_X86_SSE2;
1553     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1554       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555         VAddMicrokernelTester()
1556           .batch_size(batch_size)
1557           .y_zero_point(y_zero_point)
1558           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1559       }
1560     }
1561   }
1562 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,a_scale)1563   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1564     TEST_REQUIRES_X86_SSE2;
1565     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1566       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567         VAddMicrokernelTester()
1568           .batch_size(batch_size)
1569           .a_scale(a_scale)
1570           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1571       }
1572     }
1573   }
1574 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,b_scale)1575   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1576     TEST_REQUIRES_X86_SSE2;
1577     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1578       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579         VAddMicrokernelTester()
1580           .batch_size(batch_size)
1581           .b_scale(b_scale)
1582           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1583       }
1584     }
1585   }
1586 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,y_scale)1587   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1588     TEST_REQUIRES_X86_SSE2;
1589     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1590       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591         VAddMicrokernelTester()
1592           .batch_size(batch_size)
1593           .y_scale(y_scale)
1594           .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1595       }
1596     }
1597   }
1598 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,qmin)1599   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1600     TEST_REQUIRES_X86_SSE2;
1601     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1602       VAddMicrokernelTester()
1603         .batch_size(batch_size)
1604         .qmin(128)
1605         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1606     }
1607   }
1608 
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,qmax)1609   TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1610     TEST_REQUIRES_X86_SSE2;
1611     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1612       VAddMicrokernelTester()
1613         .batch_size(batch_size)
1614         .qmax(128)
1615         .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1616     }
1617   }
1618 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619 
1620 
1621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_eq_8)1622   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1623     TEST_REQUIRES_X86_SSE41;
1624     VAddMicrokernelTester()
1625       .batch_size(8)
1626       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1627   }
1628 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_div_8)1629   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1630     TEST_REQUIRES_X86_SSE41;
1631     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632       VAddMicrokernelTester()
1633         .batch_size(batch_size)
1634         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1635     }
1636   }
1637 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_lt_8)1638   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1639     TEST_REQUIRES_X86_SSE41;
1640     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641       VAddMicrokernelTester()
1642         .batch_size(batch_size)
1643         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1644     }
1645   }
1646 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_gt_8)1647   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1648     TEST_REQUIRES_X86_SSE41;
1649     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650       VAddMicrokernelTester()
1651         .batch_size(batch_size)
1652         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1653     }
1654   }
1655 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_a)1656   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1657     TEST_REQUIRES_X86_SSE41;
1658     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659       VAddMicrokernelTester()
1660         .batch_size(batch_size)
1661         .inplace_a(true)
1662         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1663     }
1664   }
1665 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_b)1666   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1667     TEST_REQUIRES_X86_SSE41;
1668     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669       VAddMicrokernelTester()
1670         .batch_size(batch_size)
1671         .inplace_b(true)
1672         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1673     }
1674   }
1675 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_a_and_b)1676   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1677     TEST_REQUIRES_X86_SSE41;
1678     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679       VAddMicrokernelTester()
1680         .batch_size(batch_size)
1681         .inplace_a(true)
1682         .inplace_b(true)
1683         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1684     }
1685   }
1686 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,a_zero_point)1687   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1688     TEST_REQUIRES_X86_SSE41;
1689     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691         VAddMicrokernelTester()
1692           .batch_size(batch_size)
1693           .a_zero_point(a_zero_point)
1694           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1695       }
1696     }
1697   }
1698 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,b_zero_point)1699   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1700     TEST_REQUIRES_X86_SSE41;
1701     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703         VAddMicrokernelTester()
1704           .batch_size(batch_size)
1705           .b_zero_point(b_zero_point)
1706           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1707       }
1708     }
1709   }
1710 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,y_zero_point)1711   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1712     TEST_REQUIRES_X86_SSE41;
1713     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715         VAddMicrokernelTester()
1716           .batch_size(batch_size)
1717           .y_zero_point(y_zero_point)
1718           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1719       }
1720     }
1721   }
1722 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,a_scale)1723   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1724     TEST_REQUIRES_X86_SSE41;
1725     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727         VAddMicrokernelTester()
1728           .batch_size(batch_size)
1729           .a_scale(a_scale)
1730           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1731       }
1732     }
1733   }
1734 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,b_scale)1735   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1736     TEST_REQUIRES_X86_SSE41;
1737     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739         VAddMicrokernelTester()
1740           .batch_size(batch_size)
1741           .b_scale(b_scale)
1742           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1743       }
1744     }
1745   }
1746 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,y_scale)1747   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1748     TEST_REQUIRES_X86_SSE41;
1749     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751         VAddMicrokernelTester()
1752           .batch_size(batch_size)
1753           .y_scale(y_scale)
1754           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1755       }
1756     }
1757   }
1758 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,qmin)1759   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1760     TEST_REQUIRES_X86_SSE41;
1761     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762       VAddMicrokernelTester()
1763         .batch_size(batch_size)
1764         .qmin(128)
1765         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1766     }
1767   }
1768 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,qmax)1769   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1770     TEST_REQUIRES_X86_SSE41;
1771     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772       VAddMicrokernelTester()
1773         .batch_size(batch_size)
1774         .qmax(128)
1775         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1776     }
1777   }
1778 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779 
1780 
1781 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_eq_16)1782   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1783     TEST_REQUIRES_X86_SSE41;
1784     VAddMicrokernelTester()
1785       .batch_size(16)
1786       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1787   }
1788 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_div_16)1789   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1790     TEST_REQUIRES_X86_SSE41;
1791     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792       VAddMicrokernelTester()
1793         .batch_size(batch_size)
1794         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1795     }
1796   }
1797 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_lt_16)1798   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1799     TEST_REQUIRES_X86_SSE41;
1800     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801       VAddMicrokernelTester()
1802         .batch_size(batch_size)
1803         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1804     }
1805   }
1806 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_gt_16)1807   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1808     TEST_REQUIRES_X86_SSE41;
1809     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810       VAddMicrokernelTester()
1811         .batch_size(batch_size)
1812         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1813     }
1814   }
1815 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_a)1816   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1817     TEST_REQUIRES_X86_SSE41;
1818     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819       VAddMicrokernelTester()
1820         .batch_size(batch_size)
1821         .inplace_a(true)
1822         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1823     }
1824   }
1825 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_b)1826   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1827     TEST_REQUIRES_X86_SSE41;
1828     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829       VAddMicrokernelTester()
1830         .batch_size(batch_size)
1831         .inplace_b(true)
1832         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1833     }
1834   }
1835 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_a_and_b)1836   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1837     TEST_REQUIRES_X86_SSE41;
1838     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839       VAddMicrokernelTester()
1840         .batch_size(batch_size)
1841         .inplace_a(true)
1842         .inplace_b(true)
1843         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1844     }
1845   }
1846 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,a_zero_point)1847   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1848     TEST_REQUIRES_X86_SSE41;
1849     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851         VAddMicrokernelTester()
1852           .batch_size(batch_size)
1853           .a_zero_point(a_zero_point)
1854           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1855       }
1856     }
1857   }
1858 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,b_zero_point)1859   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1860     TEST_REQUIRES_X86_SSE41;
1861     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863         VAddMicrokernelTester()
1864           .batch_size(batch_size)
1865           .b_zero_point(b_zero_point)
1866           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1867       }
1868     }
1869   }
1870 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,y_zero_point)1871   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1872     TEST_REQUIRES_X86_SSE41;
1873     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875         VAddMicrokernelTester()
1876           .batch_size(batch_size)
1877           .y_zero_point(y_zero_point)
1878           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1879       }
1880     }
1881   }
1882 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,a_scale)1883   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1884     TEST_REQUIRES_X86_SSE41;
1885     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887         VAddMicrokernelTester()
1888           .batch_size(batch_size)
1889           .a_scale(a_scale)
1890           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1891       }
1892     }
1893   }
1894 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,b_scale)1895   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1896     TEST_REQUIRES_X86_SSE41;
1897     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899         VAddMicrokernelTester()
1900           .batch_size(batch_size)
1901           .b_scale(b_scale)
1902           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1903       }
1904     }
1905   }
1906 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,y_scale)1907   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1908     TEST_REQUIRES_X86_SSE41;
1909     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911         VAddMicrokernelTester()
1912           .batch_size(batch_size)
1913           .y_scale(y_scale)
1914           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1915       }
1916     }
1917   }
1918 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,qmin)1919   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1920     TEST_REQUIRES_X86_SSE41;
1921     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922       VAddMicrokernelTester()
1923         .batch_size(batch_size)
1924         .qmin(128)
1925         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1926     }
1927   }
1928 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,qmax)1929   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1930     TEST_REQUIRES_X86_SSE41;
1931     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932       VAddMicrokernelTester()
1933         .batch_size(batch_size)
1934         .qmax(128)
1935         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1936     }
1937   }
1938 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939 
1940 
1941 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_eq_24)1942   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1943     TEST_REQUIRES_X86_SSE41;
1944     VAddMicrokernelTester()
1945       .batch_size(24)
1946       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1947   }
1948 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_div_24)1949   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1950     TEST_REQUIRES_X86_SSE41;
1951     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1952       VAddMicrokernelTester()
1953         .batch_size(batch_size)
1954         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1955     }
1956   }
1957 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_lt_24)1958   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1959     TEST_REQUIRES_X86_SSE41;
1960     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1961       VAddMicrokernelTester()
1962         .batch_size(batch_size)
1963         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1964     }
1965   }
1966 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_gt_24)1967   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1968     TEST_REQUIRES_X86_SSE41;
1969     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1970       VAddMicrokernelTester()
1971         .batch_size(batch_size)
1972         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1973     }
1974   }
1975 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,inplace_a)1976   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a) {
1977     TEST_REQUIRES_X86_SSE41;
1978     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1979       VAddMicrokernelTester()
1980         .batch_size(batch_size)
1981         .inplace_a(true)
1982         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1983     }
1984   }
1985 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,inplace_b)1986   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_b) {
1987     TEST_REQUIRES_X86_SSE41;
1988     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1989       VAddMicrokernelTester()
1990         .batch_size(batch_size)
1991         .inplace_b(true)
1992         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1993     }
1994   }
1995 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,inplace_a_and_b)1996   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a_and_b) {
1997     TEST_REQUIRES_X86_SSE41;
1998     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1999       VAddMicrokernelTester()
2000         .batch_size(batch_size)
2001         .inplace_a(true)
2002         .inplace_b(true)
2003         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2004     }
2005   }
2006 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,a_zero_point)2007   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
2008     TEST_REQUIRES_X86_SSE41;
2009     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2010       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2011         VAddMicrokernelTester()
2012           .batch_size(batch_size)
2013           .a_zero_point(a_zero_point)
2014           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2015       }
2016     }
2017   }
2018 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,b_zero_point)2019   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
2020     TEST_REQUIRES_X86_SSE41;
2021     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2022       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2023         VAddMicrokernelTester()
2024           .batch_size(batch_size)
2025           .b_zero_point(b_zero_point)
2026           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2027       }
2028     }
2029   }
2030 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,y_zero_point)2031   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
2032     TEST_REQUIRES_X86_SSE41;
2033     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2034       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2035         VAddMicrokernelTester()
2036           .batch_size(batch_size)
2037           .y_zero_point(y_zero_point)
2038           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2039       }
2040     }
2041   }
2042 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,a_scale)2043   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
2044     TEST_REQUIRES_X86_SSE41;
2045     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2046       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2047         VAddMicrokernelTester()
2048           .batch_size(batch_size)
2049           .a_scale(a_scale)
2050           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2051       }
2052     }
2053   }
2054 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,b_scale)2055   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
2056     TEST_REQUIRES_X86_SSE41;
2057     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2058       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2059         VAddMicrokernelTester()
2060           .batch_size(batch_size)
2061           .b_scale(b_scale)
2062           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2063       }
2064     }
2065   }
2066 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,y_scale)2067   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
2068     TEST_REQUIRES_X86_SSE41;
2069     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2070       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2071         VAddMicrokernelTester()
2072           .batch_size(batch_size)
2073           .y_scale(y_scale)
2074           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2075       }
2076     }
2077   }
2078 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,qmin)2079   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
2080     TEST_REQUIRES_X86_SSE41;
2081     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2082       VAddMicrokernelTester()
2083         .batch_size(batch_size)
2084         .qmin(128)
2085         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2086     }
2087   }
2088 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,qmax)2089   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
2090     TEST_REQUIRES_X86_SSE41;
2091     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2092       VAddMicrokernelTester()
2093         .batch_size(batch_size)
2094         .qmax(128)
2095         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2096     }
2097   }
2098 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2099 
2100 
2101 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_eq_32)2102   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
2103     TEST_REQUIRES_X86_SSE41;
2104     VAddMicrokernelTester()
2105       .batch_size(32)
2106       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2107   }
2108 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_div_32)2109   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
2110     TEST_REQUIRES_X86_SSE41;
2111     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2112       VAddMicrokernelTester()
2113         .batch_size(batch_size)
2114         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2115     }
2116   }
2117 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_lt_32)2118   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
2119     TEST_REQUIRES_X86_SSE41;
2120     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2121       VAddMicrokernelTester()
2122         .batch_size(batch_size)
2123         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2124     }
2125   }
2126 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_gt_32)2127   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
2128     TEST_REQUIRES_X86_SSE41;
2129     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2130       VAddMicrokernelTester()
2131         .batch_size(batch_size)
2132         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2133     }
2134   }
2135 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,inplace_a)2136   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a) {
2137     TEST_REQUIRES_X86_SSE41;
2138     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2139       VAddMicrokernelTester()
2140         .batch_size(batch_size)
2141         .inplace_a(true)
2142         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2143     }
2144   }
2145 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,inplace_b)2146   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_b) {
2147     TEST_REQUIRES_X86_SSE41;
2148     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2149       VAddMicrokernelTester()
2150         .batch_size(batch_size)
2151         .inplace_b(true)
2152         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2153     }
2154   }
2155 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,inplace_a_and_b)2156   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a_and_b) {
2157     TEST_REQUIRES_X86_SSE41;
2158     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2159       VAddMicrokernelTester()
2160         .batch_size(batch_size)
2161         .inplace_a(true)
2162         .inplace_b(true)
2163         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2164     }
2165   }
2166 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,a_zero_point)2167   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
2168     TEST_REQUIRES_X86_SSE41;
2169     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2170       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2171         VAddMicrokernelTester()
2172           .batch_size(batch_size)
2173           .a_zero_point(a_zero_point)
2174           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2175       }
2176     }
2177   }
2178 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,b_zero_point)2179   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
2180     TEST_REQUIRES_X86_SSE41;
2181     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2182       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2183         VAddMicrokernelTester()
2184           .batch_size(batch_size)
2185           .b_zero_point(b_zero_point)
2186           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2187       }
2188     }
2189   }
2190 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,y_zero_point)2191   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
2192     TEST_REQUIRES_X86_SSE41;
2193     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2194       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2195         VAddMicrokernelTester()
2196           .batch_size(batch_size)
2197           .y_zero_point(y_zero_point)
2198           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2199       }
2200     }
2201   }
2202 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,a_scale)2203   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
2204     TEST_REQUIRES_X86_SSE41;
2205     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2206       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2207         VAddMicrokernelTester()
2208           .batch_size(batch_size)
2209           .a_scale(a_scale)
2210           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2211       }
2212     }
2213   }
2214 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,b_scale)2215   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
2216     TEST_REQUIRES_X86_SSE41;
2217     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2218       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2219         VAddMicrokernelTester()
2220           .batch_size(batch_size)
2221           .b_scale(b_scale)
2222           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2223       }
2224     }
2225   }
2226 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,y_scale)2227   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
2228     TEST_REQUIRES_X86_SSE41;
2229     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2230       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2231         VAddMicrokernelTester()
2232           .batch_size(batch_size)
2233           .y_scale(y_scale)
2234           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2235       }
2236     }
2237   }
2238 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,qmin)2239   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
2240     TEST_REQUIRES_X86_SSE41;
2241     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2242       VAddMicrokernelTester()
2243         .batch_size(batch_size)
2244         .qmin(128)
2245         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2246     }
2247   }
2248 
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,qmax)2249   TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
2250     TEST_REQUIRES_X86_SSE41;
2251     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2252       VAddMicrokernelTester()
2253         .batch_size(batch_size)
2254         .qmax(128)
2255         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2256     }
2257   }
2258 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259 
2260 
2261 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_eq_8)2262   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
2263     TEST_REQUIRES_X86_AVX;
2264     VAddMicrokernelTester()
2265       .batch_size(8)
2266       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2267   }
2268 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_div_8)2269   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
2270     TEST_REQUIRES_X86_AVX;
2271     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2272       VAddMicrokernelTester()
2273         .batch_size(batch_size)
2274         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2275     }
2276   }
2277 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_lt_8)2278   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
2279     TEST_REQUIRES_X86_AVX;
2280     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2281       VAddMicrokernelTester()
2282         .batch_size(batch_size)
2283         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2284     }
2285   }
2286 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_gt_8)2287   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
2288     TEST_REQUIRES_X86_AVX;
2289     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2290       VAddMicrokernelTester()
2291         .batch_size(batch_size)
2292         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2293     }
2294   }
2295 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_a)2296   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a) {
2297     TEST_REQUIRES_X86_AVX;
2298     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2299       VAddMicrokernelTester()
2300         .batch_size(batch_size)
2301         .inplace_a(true)
2302         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2303     }
2304   }
2305 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_b)2306   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_b) {
2307     TEST_REQUIRES_X86_AVX;
2308     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2309       VAddMicrokernelTester()
2310         .batch_size(batch_size)
2311         .inplace_b(true)
2312         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2313     }
2314   }
2315 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_a_and_b)2316   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a_and_b) {
2317     TEST_REQUIRES_X86_AVX;
2318     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2319       VAddMicrokernelTester()
2320         .batch_size(batch_size)
2321         .inplace_a(true)
2322         .inplace_b(true)
2323         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2324     }
2325   }
2326 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,a_zero_point)2327   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
2328     TEST_REQUIRES_X86_AVX;
2329     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2330       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2331         VAddMicrokernelTester()
2332           .batch_size(batch_size)
2333           .a_zero_point(a_zero_point)
2334           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2335       }
2336     }
2337   }
2338 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,b_zero_point)2339   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
2340     TEST_REQUIRES_X86_AVX;
2341     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2342       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2343         VAddMicrokernelTester()
2344           .batch_size(batch_size)
2345           .b_zero_point(b_zero_point)
2346           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2347       }
2348     }
2349   }
2350 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,y_zero_point)2351   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
2352     TEST_REQUIRES_X86_AVX;
2353     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2354       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2355         VAddMicrokernelTester()
2356           .batch_size(batch_size)
2357           .y_zero_point(y_zero_point)
2358           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2359       }
2360     }
2361   }
2362 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,a_scale)2363   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
2364     TEST_REQUIRES_X86_AVX;
2365     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2366       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2367         VAddMicrokernelTester()
2368           .batch_size(batch_size)
2369           .a_scale(a_scale)
2370           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2371       }
2372     }
2373   }
2374 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,b_scale)2375   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
2376     TEST_REQUIRES_X86_AVX;
2377     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2378       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2379         VAddMicrokernelTester()
2380           .batch_size(batch_size)
2381           .b_scale(b_scale)
2382           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2383       }
2384     }
2385   }
2386 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,y_scale)2387   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
2388     TEST_REQUIRES_X86_AVX;
2389     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2390       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2391         VAddMicrokernelTester()
2392           .batch_size(batch_size)
2393           .y_scale(y_scale)
2394           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2395       }
2396     }
2397   }
2398 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,qmin)2399   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmin) {
2400     TEST_REQUIRES_X86_AVX;
2401     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2402       VAddMicrokernelTester()
2403         .batch_size(batch_size)
2404         .qmin(128)
2405         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2406     }
2407   }
2408 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,qmax)2409   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmax) {
2410     TEST_REQUIRES_X86_AVX;
2411     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2412       VAddMicrokernelTester()
2413         .batch_size(batch_size)
2414         .qmax(128)
2415         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2416     }
2417   }
2418 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2419 
2420 
2421 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_eq_16)2422   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
2423     TEST_REQUIRES_X86_AVX;
2424     VAddMicrokernelTester()
2425       .batch_size(16)
2426       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2427   }
2428 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_div_16)2429   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
2430     TEST_REQUIRES_X86_AVX;
2431     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2432       VAddMicrokernelTester()
2433         .batch_size(batch_size)
2434         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2435     }
2436   }
2437 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_lt_16)2438   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
2439     TEST_REQUIRES_X86_AVX;
2440     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2441       VAddMicrokernelTester()
2442         .batch_size(batch_size)
2443         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2444     }
2445   }
2446 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_gt_16)2447   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
2448     TEST_REQUIRES_X86_AVX;
2449     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2450       VAddMicrokernelTester()
2451         .batch_size(batch_size)
2452         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2453     }
2454   }
2455 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_a)2456   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a) {
2457     TEST_REQUIRES_X86_AVX;
2458     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2459       VAddMicrokernelTester()
2460         .batch_size(batch_size)
2461         .inplace_a(true)
2462         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2463     }
2464   }
2465 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_b)2466   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_b) {
2467     TEST_REQUIRES_X86_AVX;
2468     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2469       VAddMicrokernelTester()
2470         .batch_size(batch_size)
2471         .inplace_b(true)
2472         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2473     }
2474   }
2475 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_a_and_b)2476   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a_and_b) {
2477     TEST_REQUIRES_X86_AVX;
2478     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479       VAddMicrokernelTester()
2480         .batch_size(batch_size)
2481         .inplace_a(true)
2482         .inplace_b(true)
2483         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2484     }
2485   }
2486 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,a_zero_point)2487   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
2488     TEST_REQUIRES_X86_AVX;
2489     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2490       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2491         VAddMicrokernelTester()
2492           .batch_size(batch_size)
2493           .a_zero_point(a_zero_point)
2494           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2495       }
2496     }
2497   }
2498 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,b_zero_point)2499   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
2500     TEST_REQUIRES_X86_AVX;
2501     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2502       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2503         VAddMicrokernelTester()
2504           .batch_size(batch_size)
2505           .b_zero_point(b_zero_point)
2506           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2507       }
2508     }
2509   }
2510 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,y_zero_point)2511   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
2512     TEST_REQUIRES_X86_AVX;
2513     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2515         VAddMicrokernelTester()
2516           .batch_size(batch_size)
2517           .y_zero_point(y_zero_point)
2518           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2519       }
2520     }
2521   }
2522 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,a_scale)2523   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
2524     TEST_REQUIRES_X86_AVX;
2525     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2526       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2527         VAddMicrokernelTester()
2528           .batch_size(batch_size)
2529           .a_scale(a_scale)
2530           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2531       }
2532     }
2533   }
2534 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,b_scale)2535   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
2536     TEST_REQUIRES_X86_AVX;
2537     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2538       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2539         VAddMicrokernelTester()
2540           .batch_size(batch_size)
2541           .b_scale(b_scale)
2542           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2543       }
2544     }
2545   }
2546 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,y_scale)2547   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
2548     TEST_REQUIRES_X86_AVX;
2549     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2550       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2551         VAddMicrokernelTester()
2552           .batch_size(batch_size)
2553           .y_scale(y_scale)
2554           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2555       }
2556     }
2557   }
2558 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,qmin)2559   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmin) {
2560     TEST_REQUIRES_X86_AVX;
2561     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2562       VAddMicrokernelTester()
2563         .batch_size(batch_size)
2564         .qmin(128)
2565         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2566     }
2567   }
2568 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,qmax)2569   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmax) {
2570     TEST_REQUIRES_X86_AVX;
2571     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2572       VAddMicrokernelTester()
2573         .batch_size(batch_size)
2574         .qmax(128)
2575         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2576     }
2577   }
2578 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2579 
2580 
2581 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_eq_24)2582   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_eq_24) {
2583     TEST_REQUIRES_X86_AVX;
2584     VAddMicrokernelTester()
2585       .batch_size(24)
2586       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2587   }
2588 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_div_24)2589   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_div_24) {
2590     TEST_REQUIRES_X86_AVX;
2591     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2592       VAddMicrokernelTester()
2593         .batch_size(batch_size)
2594         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2595     }
2596   }
2597 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_lt_24)2598   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_lt_24) {
2599     TEST_REQUIRES_X86_AVX;
2600     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2601       VAddMicrokernelTester()
2602         .batch_size(batch_size)
2603         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2604     }
2605   }
2606 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_gt_24)2607   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_gt_24) {
2608     TEST_REQUIRES_X86_AVX;
2609     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2610       VAddMicrokernelTester()
2611         .batch_size(batch_size)
2612         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2613     }
2614   }
2615 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,inplace_a)2616   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_a) {
2617     TEST_REQUIRES_X86_AVX;
2618     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2619       VAddMicrokernelTester()
2620         .batch_size(batch_size)
2621         .inplace_a(true)
2622         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2623     }
2624   }
2625 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,inplace_b)2626   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_b) {
2627     TEST_REQUIRES_X86_AVX;
2628     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2629       VAddMicrokernelTester()
2630         .batch_size(batch_size)
2631         .inplace_b(true)
2632         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2633     }
2634   }
2635 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,inplace_a_and_b)2636   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_a_and_b) {
2637     TEST_REQUIRES_X86_AVX;
2638     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2639       VAddMicrokernelTester()
2640         .batch_size(batch_size)
2641         .inplace_a(true)
2642         .inplace_b(true)
2643         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2644     }
2645   }
2646 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,a_zero_point)2647   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, a_zero_point) {
2648     TEST_REQUIRES_X86_AVX;
2649     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2650       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2651         VAddMicrokernelTester()
2652           .batch_size(batch_size)
2653           .a_zero_point(a_zero_point)
2654           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2655       }
2656     }
2657   }
2658 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,b_zero_point)2659   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, b_zero_point) {
2660     TEST_REQUIRES_X86_AVX;
2661     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2662       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2663         VAddMicrokernelTester()
2664           .batch_size(batch_size)
2665           .b_zero_point(b_zero_point)
2666           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2667       }
2668     }
2669   }
2670 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,y_zero_point)2671   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, y_zero_point) {
2672     TEST_REQUIRES_X86_AVX;
2673     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2674       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2675         VAddMicrokernelTester()
2676           .batch_size(batch_size)
2677           .y_zero_point(y_zero_point)
2678           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2679       }
2680     }
2681   }
2682 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,a_scale)2683   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, a_scale) {
2684     TEST_REQUIRES_X86_AVX;
2685     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2686       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2687         VAddMicrokernelTester()
2688           .batch_size(batch_size)
2689           .a_scale(a_scale)
2690           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2691       }
2692     }
2693   }
2694 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,b_scale)2695   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, b_scale) {
2696     TEST_REQUIRES_X86_AVX;
2697     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2698       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2699         VAddMicrokernelTester()
2700           .batch_size(batch_size)
2701           .b_scale(b_scale)
2702           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2703       }
2704     }
2705   }
2706 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,y_scale)2707   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, y_scale) {
2708     TEST_REQUIRES_X86_AVX;
2709     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2710       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2711         VAddMicrokernelTester()
2712           .batch_size(batch_size)
2713           .y_scale(y_scale)
2714           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2715       }
2716     }
2717   }
2718 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,qmin)2719   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, qmin) {
2720     TEST_REQUIRES_X86_AVX;
2721     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2722       VAddMicrokernelTester()
2723         .batch_size(batch_size)
2724         .qmin(128)
2725         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2726     }
2727   }
2728 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,qmax)2729   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, qmax) {
2730     TEST_REQUIRES_X86_AVX;
2731     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2732       VAddMicrokernelTester()
2733         .batch_size(batch_size)
2734         .qmax(128)
2735         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2736     }
2737   }
2738 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2739 
2740 
2741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_eq_32)2742   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_eq_32) {
2743     TEST_REQUIRES_X86_AVX;
2744     VAddMicrokernelTester()
2745       .batch_size(32)
2746       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2747   }
2748 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_div_32)2749   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_div_32) {
2750     TEST_REQUIRES_X86_AVX;
2751     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2752       VAddMicrokernelTester()
2753         .batch_size(batch_size)
2754         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2755     }
2756   }
2757 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_lt_32)2758   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_lt_32) {
2759     TEST_REQUIRES_X86_AVX;
2760     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2761       VAddMicrokernelTester()
2762         .batch_size(batch_size)
2763         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2764     }
2765   }
2766 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_gt_32)2767   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_gt_32) {
2768     TEST_REQUIRES_X86_AVX;
2769     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2770       VAddMicrokernelTester()
2771         .batch_size(batch_size)
2772         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2773     }
2774   }
2775 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,inplace_a)2776   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_a) {
2777     TEST_REQUIRES_X86_AVX;
2778     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2779       VAddMicrokernelTester()
2780         .batch_size(batch_size)
2781         .inplace_a(true)
2782         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2783     }
2784   }
2785 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,inplace_b)2786   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_b) {
2787     TEST_REQUIRES_X86_AVX;
2788     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2789       VAddMicrokernelTester()
2790         .batch_size(batch_size)
2791         .inplace_b(true)
2792         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2793     }
2794   }
2795 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,inplace_a_and_b)2796   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_a_and_b) {
2797     TEST_REQUIRES_X86_AVX;
2798     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2799       VAddMicrokernelTester()
2800         .batch_size(batch_size)
2801         .inplace_a(true)
2802         .inplace_b(true)
2803         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2804     }
2805   }
2806 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,a_zero_point)2807   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, a_zero_point) {
2808     TEST_REQUIRES_X86_AVX;
2809     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2810       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2811         VAddMicrokernelTester()
2812           .batch_size(batch_size)
2813           .a_zero_point(a_zero_point)
2814           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2815       }
2816     }
2817   }
2818 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,b_zero_point)2819   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, b_zero_point) {
2820     TEST_REQUIRES_X86_AVX;
2821     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2822       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2823         VAddMicrokernelTester()
2824           .batch_size(batch_size)
2825           .b_zero_point(b_zero_point)
2826           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2827       }
2828     }
2829   }
2830 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,y_zero_point)2831   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, y_zero_point) {
2832     TEST_REQUIRES_X86_AVX;
2833     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2834       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2835         VAddMicrokernelTester()
2836           .batch_size(batch_size)
2837           .y_zero_point(y_zero_point)
2838           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2839       }
2840     }
2841   }
2842 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,a_scale)2843   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, a_scale) {
2844     TEST_REQUIRES_X86_AVX;
2845     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2846       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2847         VAddMicrokernelTester()
2848           .batch_size(batch_size)
2849           .a_scale(a_scale)
2850           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2851       }
2852     }
2853   }
2854 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,b_scale)2855   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, b_scale) {
2856     TEST_REQUIRES_X86_AVX;
2857     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2858       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2859         VAddMicrokernelTester()
2860           .batch_size(batch_size)
2861           .b_scale(b_scale)
2862           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2863       }
2864     }
2865   }
2866 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,y_scale)2867   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, y_scale) {
2868     TEST_REQUIRES_X86_AVX;
2869     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2870       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2871         VAddMicrokernelTester()
2872           .batch_size(batch_size)
2873           .y_scale(y_scale)
2874           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2875       }
2876     }
2877   }
2878 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,qmin)2879   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, qmin) {
2880     TEST_REQUIRES_X86_AVX;
2881     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2882       VAddMicrokernelTester()
2883         .batch_size(batch_size)
2884         .qmin(128)
2885         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2886     }
2887   }
2888 
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,qmax)2889   TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, qmax) {
2890     TEST_REQUIRES_X86_AVX;
2891     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2892       VAddMicrokernelTester()
2893         .batch_size(batch_size)
2894         .qmax(128)
2895         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2896     }
2897   }
2898 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2899 
2900 
2901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_eq_8)2902   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
2903     TEST_REQUIRES_X86_SSE41;
2904     VAddMicrokernelTester()
2905       .batch_size(8)
2906       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2907   }
2908 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_div_8)2909   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
2910     TEST_REQUIRES_X86_SSE41;
2911     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2912       VAddMicrokernelTester()
2913         .batch_size(batch_size)
2914         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2915     }
2916   }
2917 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_lt_8)2918   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
2919     TEST_REQUIRES_X86_SSE41;
2920     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2921       VAddMicrokernelTester()
2922         .batch_size(batch_size)
2923         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2924     }
2925   }
2926 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_gt_8)2927   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
2928     TEST_REQUIRES_X86_SSE41;
2929     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2930       VAddMicrokernelTester()
2931         .batch_size(batch_size)
2932         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2933     }
2934   }
2935 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_a)2936   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
2937     TEST_REQUIRES_X86_SSE41;
2938     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2939       VAddMicrokernelTester()
2940         .batch_size(batch_size)
2941         .inplace_a(true)
2942         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2943     }
2944   }
2945 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_b)2946   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
2947     TEST_REQUIRES_X86_SSE41;
2948     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2949       VAddMicrokernelTester()
2950         .batch_size(batch_size)
2951         .inplace_b(true)
2952         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2953     }
2954   }
2955 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_a_and_b)2956   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
2957     TEST_REQUIRES_X86_SSE41;
2958     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2959       VAddMicrokernelTester()
2960         .batch_size(batch_size)
2961         .inplace_a(true)
2962         .inplace_b(true)
2963         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2964     }
2965   }
2966 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,a_zero_point)2967   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
2968     TEST_REQUIRES_X86_SSE41;
2969     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2970       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2971         VAddMicrokernelTester()
2972           .batch_size(batch_size)
2973           .a_zero_point(a_zero_point)
2974           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2975       }
2976     }
2977   }
2978 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,b_zero_point)2979   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
2980     TEST_REQUIRES_X86_SSE41;
2981     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2982       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2983         VAddMicrokernelTester()
2984           .batch_size(batch_size)
2985           .b_zero_point(b_zero_point)
2986           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2987       }
2988     }
2989   }
2990 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,y_zero_point)2991   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
2992     TEST_REQUIRES_X86_SSE41;
2993     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2994       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2995         VAddMicrokernelTester()
2996           .batch_size(batch_size)
2997           .y_zero_point(y_zero_point)
2998           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2999       }
3000     }
3001   }
3002 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,a_scale)3003   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
3004     TEST_REQUIRES_X86_SSE41;
3005     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3006       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3007         VAddMicrokernelTester()
3008           .batch_size(batch_size)
3009           .a_scale(a_scale)
3010           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3011       }
3012     }
3013   }
3014 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,b_scale)3015   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
3016     TEST_REQUIRES_X86_SSE41;
3017     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3018       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3019         VAddMicrokernelTester()
3020           .batch_size(batch_size)
3021           .b_scale(b_scale)
3022           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3023       }
3024     }
3025   }
3026 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,y_scale)3027   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
3028     TEST_REQUIRES_X86_SSE41;
3029     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3030       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3031         VAddMicrokernelTester()
3032           .batch_size(batch_size)
3033           .y_scale(y_scale)
3034           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3035       }
3036     }
3037   }
3038 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,qmin)3039   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
3040     TEST_REQUIRES_X86_SSE41;
3041     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3042       VAddMicrokernelTester()
3043         .batch_size(batch_size)
3044         .qmin(128)
3045         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3046     }
3047   }
3048 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,qmax)3049   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
3050     TEST_REQUIRES_X86_SSE41;
3051     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3052       VAddMicrokernelTester()
3053         .batch_size(batch_size)
3054         .qmax(128)
3055         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3056     }
3057   }
3058 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3059 
3060 
3061 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_eq_16)3062   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
3063     TEST_REQUIRES_X86_SSE41;
3064     VAddMicrokernelTester()
3065       .batch_size(16)
3066       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3067   }
3068 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_div_16)3069   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
3070     TEST_REQUIRES_X86_SSE41;
3071     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3072       VAddMicrokernelTester()
3073         .batch_size(batch_size)
3074         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3075     }
3076   }
3077 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_lt_16)3078   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
3079     TEST_REQUIRES_X86_SSE41;
3080     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3081       VAddMicrokernelTester()
3082         .batch_size(batch_size)
3083         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3084     }
3085   }
3086 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_gt_16)3087   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
3088     TEST_REQUIRES_X86_SSE41;
3089     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3090       VAddMicrokernelTester()
3091         .batch_size(batch_size)
3092         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3093     }
3094   }
3095 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_a)3096   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
3097     TEST_REQUIRES_X86_SSE41;
3098     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3099       VAddMicrokernelTester()
3100         .batch_size(batch_size)
3101         .inplace_a(true)
3102         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3103     }
3104   }
3105 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_b)3106   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
3107     TEST_REQUIRES_X86_SSE41;
3108     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3109       VAddMicrokernelTester()
3110         .batch_size(batch_size)
3111         .inplace_b(true)
3112         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3113     }
3114   }
3115 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_a_and_b)3116   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
3117     TEST_REQUIRES_X86_SSE41;
3118     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3119       VAddMicrokernelTester()
3120         .batch_size(batch_size)
3121         .inplace_a(true)
3122         .inplace_b(true)
3123         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3124     }
3125   }
3126 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,a_zero_point)3127   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
3128     TEST_REQUIRES_X86_SSE41;
3129     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3130       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3131         VAddMicrokernelTester()
3132           .batch_size(batch_size)
3133           .a_zero_point(a_zero_point)
3134           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3135       }
3136     }
3137   }
3138 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,b_zero_point)3139   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
3140     TEST_REQUIRES_X86_SSE41;
3141     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3142       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3143         VAddMicrokernelTester()
3144           .batch_size(batch_size)
3145           .b_zero_point(b_zero_point)
3146           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3147       }
3148     }
3149   }
3150 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,y_zero_point)3151   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
3152     TEST_REQUIRES_X86_SSE41;
3153     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3154       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3155         VAddMicrokernelTester()
3156           .batch_size(batch_size)
3157           .y_zero_point(y_zero_point)
3158           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3159       }
3160     }
3161   }
3162 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,a_scale)3163   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
3164     TEST_REQUIRES_X86_SSE41;
3165     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3166       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3167         VAddMicrokernelTester()
3168           .batch_size(batch_size)
3169           .a_scale(a_scale)
3170           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3171       }
3172     }
3173   }
3174 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,b_scale)3175   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
3176     TEST_REQUIRES_X86_SSE41;
3177     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3178       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3179         VAddMicrokernelTester()
3180           .batch_size(batch_size)
3181           .b_scale(b_scale)
3182           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3183       }
3184     }
3185   }
3186 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,y_scale)3187   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
3188     TEST_REQUIRES_X86_SSE41;
3189     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3190       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3191         VAddMicrokernelTester()
3192           .batch_size(batch_size)
3193           .y_scale(y_scale)
3194           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3195       }
3196     }
3197   }
3198 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,qmin)3199   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
3200     TEST_REQUIRES_X86_SSE41;
3201     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3202       VAddMicrokernelTester()
3203         .batch_size(batch_size)
3204         .qmin(128)
3205         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3206     }
3207   }
3208 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,qmax)3209   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
3210     TEST_REQUIRES_X86_SSE41;
3211     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3212       VAddMicrokernelTester()
3213         .batch_size(batch_size)
3214         .qmax(128)
3215         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3216     }
3217   }
3218 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3219 
3220 
3221 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_eq_24)3222   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
3223     TEST_REQUIRES_X86_SSE41;
3224     VAddMicrokernelTester()
3225       .batch_size(24)
3226       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3227   }
3228 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_div_24)3229   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
3230     TEST_REQUIRES_X86_SSE41;
3231     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3232       VAddMicrokernelTester()
3233         .batch_size(batch_size)
3234         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3235     }
3236   }
3237 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_lt_24)3238   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
3239     TEST_REQUIRES_X86_SSE41;
3240     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3241       VAddMicrokernelTester()
3242         .batch_size(batch_size)
3243         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3244     }
3245   }
3246 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_gt_24)3247   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
3248     TEST_REQUIRES_X86_SSE41;
3249     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3250       VAddMicrokernelTester()
3251         .batch_size(batch_size)
3252         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3253     }
3254   }
3255 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,inplace_a)3256   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a) {
3257     TEST_REQUIRES_X86_SSE41;
3258     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3259       VAddMicrokernelTester()
3260         .batch_size(batch_size)
3261         .inplace_a(true)
3262         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3263     }
3264   }
3265 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,inplace_b)3266   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_b) {
3267     TEST_REQUIRES_X86_SSE41;
3268     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3269       VAddMicrokernelTester()
3270         .batch_size(batch_size)
3271         .inplace_b(true)
3272         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3273     }
3274   }
3275 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,inplace_a_and_b)3276   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a_and_b) {
3277     TEST_REQUIRES_X86_SSE41;
3278     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3279       VAddMicrokernelTester()
3280         .batch_size(batch_size)
3281         .inplace_a(true)
3282         .inplace_b(true)
3283         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3284     }
3285   }
3286 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,a_zero_point)3287   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
3288     TEST_REQUIRES_X86_SSE41;
3289     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3290       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3291         VAddMicrokernelTester()
3292           .batch_size(batch_size)
3293           .a_zero_point(a_zero_point)
3294           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3295       }
3296     }
3297   }
3298 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,b_zero_point)3299   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
3300     TEST_REQUIRES_X86_SSE41;
3301     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3302       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3303         VAddMicrokernelTester()
3304           .batch_size(batch_size)
3305           .b_zero_point(b_zero_point)
3306           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3307       }
3308     }
3309   }
3310 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,y_zero_point)3311   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
3312     TEST_REQUIRES_X86_SSE41;
3313     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3314       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3315         VAddMicrokernelTester()
3316           .batch_size(batch_size)
3317           .y_zero_point(y_zero_point)
3318           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3319       }
3320     }
3321   }
3322 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,a_scale)3323   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
3324     TEST_REQUIRES_X86_SSE41;
3325     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3326       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3327         VAddMicrokernelTester()
3328           .batch_size(batch_size)
3329           .a_scale(a_scale)
3330           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3331       }
3332     }
3333   }
3334 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,b_scale)3335   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
3336     TEST_REQUIRES_X86_SSE41;
3337     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3338       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3339         VAddMicrokernelTester()
3340           .batch_size(batch_size)
3341           .b_scale(b_scale)
3342           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3343       }
3344     }
3345   }
3346 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,y_scale)3347   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
3348     TEST_REQUIRES_X86_SSE41;
3349     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3350       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3351         VAddMicrokernelTester()
3352           .batch_size(batch_size)
3353           .y_scale(y_scale)
3354           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3355       }
3356     }
3357   }
3358 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,qmin)3359   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
3360     TEST_REQUIRES_X86_SSE41;
3361     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3362       VAddMicrokernelTester()
3363         .batch_size(batch_size)
3364         .qmin(128)
3365         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3366     }
3367   }
3368 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,qmax)3369   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
3370     TEST_REQUIRES_X86_SSE41;
3371     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3372       VAddMicrokernelTester()
3373         .batch_size(batch_size)
3374         .qmax(128)
3375         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3376     }
3377   }
3378 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3379 
3380 
3381 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_eq_32)3382   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
3383     TEST_REQUIRES_X86_SSE41;
3384     VAddMicrokernelTester()
3385       .batch_size(32)
3386       .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3387   }
3388 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_div_32)3389   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
3390     TEST_REQUIRES_X86_SSE41;
3391     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3392       VAddMicrokernelTester()
3393         .batch_size(batch_size)
3394         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3395     }
3396   }
3397 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_lt_32)3398   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
3399     TEST_REQUIRES_X86_SSE41;
3400     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3401       VAddMicrokernelTester()
3402         .batch_size(batch_size)
3403         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3404     }
3405   }
3406 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_gt_32)3407   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
3408     TEST_REQUIRES_X86_SSE41;
3409     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3410       VAddMicrokernelTester()
3411         .batch_size(batch_size)
3412         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3413     }
3414   }
3415 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,inplace_a)3416   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a) {
3417     TEST_REQUIRES_X86_SSE41;
3418     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3419       VAddMicrokernelTester()
3420         .batch_size(batch_size)
3421         .inplace_a(true)
3422         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3423     }
3424   }
3425 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,inplace_b)3426   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_b) {
3427     TEST_REQUIRES_X86_SSE41;
3428     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3429       VAddMicrokernelTester()
3430         .batch_size(batch_size)
3431         .inplace_b(true)
3432         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3433     }
3434   }
3435 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,inplace_a_and_b)3436   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a_and_b) {
3437     TEST_REQUIRES_X86_SSE41;
3438     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3439       VAddMicrokernelTester()
3440         .batch_size(batch_size)
3441         .inplace_a(true)
3442         .inplace_b(true)
3443         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3444     }
3445   }
3446 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,a_zero_point)3447   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
3448     TEST_REQUIRES_X86_SSE41;
3449     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3450       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3451         VAddMicrokernelTester()
3452           .batch_size(batch_size)
3453           .a_zero_point(a_zero_point)
3454           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3455       }
3456     }
3457   }
3458 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,b_zero_point)3459   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
3460     TEST_REQUIRES_X86_SSE41;
3461     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3462       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3463         VAddMicrokernelTester()
3464           .batch_size(batch_size)
3465           .b_zero_point(b_zero_point)
3466           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3467       }
3468     }
3469   }
3470 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,y_zero_point)3471   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
3472     TEST_REQUIRES_X86_SSE41;
3473     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3474       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3475         VAddMicrokernelTester()
3476           .batch_size(batch_size)
3477           .y_zero_point(y_zero_point)
3478           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3479       }
3480     }
3481   }
3482 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,a_scale)3483   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
3484     TEST_REQUIRES_X86_SSE41;
3485     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3486       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3487         VAddMicrokernelTester()
3488           .batch_size(batch_size)
3489           .a_scale(a_scale)
3490           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3491       }
3492     }
3493   }
3494 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,b_scale)3495   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
3496     TEST_REQUIRES_X86_SSE41;
3497     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3498       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3499         VAddMicrokernelTester()
3500           .batch_size(batch_size)
3501           .b_scale(b_scale)
3502           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3503       }
3504     }
3505   }
3506 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,y_scale)3507   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
3508     TEST_REQUIRES_X86_SSE41;
3509     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3510       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3511         VAddMicrokernelTester()
3512           .batch_size(batch_size)
3513           .y_scale(y_scale)
3514           .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3515       }
3516     }
3517   }
3518 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,qmin)3519   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
3520     TEST_REQUIRES_X86_SSE41;
3521     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3522       VAddMicrokernelTester()
3523         .batch_size(batch_size)
3524         .qmin(128)
3525         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3526     }
3527   }
3528 
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,qmax)3529   TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
3530     TEST_REQUIRES_X86_SSE41;
3531     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3532       VAddMicrokernelTester()
3533         .batch_size(batch_size)
3534         .qmax(128)
3535         .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3536     }
3537   }
3538 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3539 
3540 
3541 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_eq_8)3542   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
3543     TEST_REQUIRES_X86_AVX;
3544     VAddMicrokernelTester()
3545       .batch_size(8)
3546       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3547   }
3548 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_div_8)3549   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
3550     TEST_REQUIRES_X86_AVX;
3551     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3552       VAddMicrokernelTester()
3553         .batch_size(batch_size)
3554         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3555     }
3556   }
3557 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_lt_8)3558   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
3559     TEST_REQUIRES_X86_AVX;
3560     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3561       VAddMicrokernelTester()
3562         .batch_size(batch_size)
3563         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3564     }
3565   }
3566 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_gt_8)3567   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
3568     TEST_REQUIRES_X86_AVX;
3569     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3570       VAddMicrokernelTester()
3571         .batch_size(batch_size)
3572         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3573     }
3574   }
3575 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_a)3576   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a) {
3577     TEST_REQUIRES_X86_AVX;
3578     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3579       VAddMicrokernelTester()
3580         .batch_size(batch_size)
3581         .inplace_a(true)
3582         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3583     }
3584   }
3585 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_b)3586   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_b) {
3587     TEST_REQUIRES_X86_AVX;
3588     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3589       VAddMicrokernelTester()
3590         .batch_size(batch_size)
3591         .inplace_b(true)
3592         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3593     }
3594   }
3595 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_a_and_b)3596   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a_and_b) {
3597     TEST_REQUIRES_X86_AVX;
3598     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3599       VAddMicrokernelTester()
3600         .batch_size(batch_size)
3601         .inplace_a(true)
3602         .inplace_b(true)
3603         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3604     }
3605   }
3606 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,a_zero_point)3607   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
3608     TEST_REQUIRES_X86_AVX;
3609     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3610       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3611         VAddMicrokernelTester()
3612           .batch_size(batch_size)
3613           .a_zero_point(a_zero_point)
3614           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3615       }
3616     }
3617   }
3618 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,b_zero_point)3619   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
3620     TEST_REQUIRES_X86_AVX;
3621     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3622       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3623         VAddMicrokernelTester()
3624           .batch_size(batch_size)
3625           .b_zero_point(b_zero_point)
3626           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3627       }
3628     }
3629   }
3630 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,y_zero_point)3631   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
3632     TEST_REQUIRES_X86_AVX;
3633     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3634       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3635         VAddMicrokernelTester()
3636           .batch_size(batch_size)
3637           .y_zero_point(y_zero_point)
3638           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3639       }
3640     }
3641   }
3642 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,a_scale)3643   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
3644     TEST_REQUIRES_X86_AVX;
3645     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3646       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3647         VAddMicrokernelTester()
3648           .batch_size(batch_size)
3649           .a_scale(a_scale)
3650           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3651       }
3652     }
3653   }
3654 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,b_scale)3655   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
3656     TEST_REQUIRES_X86_AVX;
3657     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3658       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3659         VAddMicrokernelTester()
3660           .batch_size(batch_size)
3661           .b_scale(b_scale)
3662           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3663       }
3664     }
3665   }
3666 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,y_scale)3667   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
3668     TEST_REQUIRES_X86_AVX;
3669     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3670       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3671         VAddMicrokernelTester()
3672           .batch_size(batch_size)
3673           .y_scale(y_scale)
3674           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3675       }
3676     }
3677   }
3678 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,qmin)3679   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmin) {
3680     TEST_REQUIRES_X86_AVX;
3681     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3682       VAddMicrokernelTester()
3683         .batch_size(batch_size)
3684         .qmin(128)
3685         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3686     }
3687   }
3688 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,qmax)3689   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmax) {
3690     TEST_REQUIRES_X86_AVX;
3691     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3692       VAddMicrokernelTester()
3693         .batch_size(batch_size)
3694         .qmax(128)
3695         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3696     }
3697   }
3698 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3699 
3700 
3701 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_eq_16)3702   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
3703     TEST_REQUIRES_X86_AVX;
3704     VAddMicrokernelTester()
3705       .batch_size(16)
3706       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3707   }
3708 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_div_16)3709   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
3710     TEST_REQUIRES_X86_AVX;
3711     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3712       VAddMicrokernelTester()
3713         .batch_size(batch_size)
3714         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3715     }
3716   }
3717 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_lt_16)3718   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
3719     TEST_REQUIRES_X86_AVX;
3720     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3721       VAddMicrokernelTester()
3722         .batch_size(batch_size)
3723         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3724     }
3725   }
3726 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_gt_16)3727   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
3728     TEST_REQUIRES_X86_AVX;
3729     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3730       VAddMicrokernelTester()
3731         .batch_size(batch_size)
3732         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3733     }
3734   }
3735 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_a)3736   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a) {
3737     TEST_REQUIRES_X86_AVX;
3738     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3739       VAddMicrokernelTester()
3740         .batch_size(batch_size)
3741         .inplace_a(true)
3742         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3743     }
3744   }
3745 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_b)3746   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_b) {
3747     TEST_REQUIRES_X86_AVX;
3748     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3749       VAddMicrokernelTester()
3750         .batch_size(batch_size)
3751         .inplace_b(true)
3752         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3753     }
3754   }
3755 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_a_and_b)3756   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a_and_b) {
3757     TEST_REQUIRES_X86_AVX;
3758     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3759       VAddMicrokernelTester()
3760         .batch_size(batch_size)
3761         .inplace_a(true)
3762         .inplace_b(true)
3763         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3764     }
3765   }
3766 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,a_zero_point)3767   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
3768     TEST_REQUIRES_X86_AVX;
3769     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3770       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3771         VAddMicrokernelTester()
3772           .batch_size(batch_size)
3773           .a_zero_point(a_zero_point)
3774           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3775       }
3776     }
3777   }
3778 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,b_zero_point)3779   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
3780     TEST_REQUIRES_X86_AVX;
3781     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3782       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3783         VAddMicrokernelTester()
3784           .batch_size(batch_size)
3785           .b_zero_point(b_zero_point)
3786           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3787       }
3788     }
3789   }
3790 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,y_zero_point)3791   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
3792     TEST_REQUIRES_X86_AVX;
3793     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3794       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3795         VAddMicrokernelTester()
3796           .batch_size(batch_size)
3797           .y_zero_point(y_zero_point)
3798           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3799       }
3800     }
3801   }
3802 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,a_scale)3803   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
3804     TEST_REQUIRES_X86_AVX;
3805     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3806       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3807         VAddMicrokernelTester()
3808           .batch_size(batch_size)
3809           .a_scale(a_scale)
3810           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3811       }
3812     }
3813   }
3814 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,b_scale)3815   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
3816     TEST_REQUIRES_X86_AVX;
3817     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3818       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3819         VAddMicrokernelTester()
3820           .batch_size(batch_size)
3821           .b_scale(b_scale)
3822           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3823       }
3824     }
3825   }
3826 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,y_scale)3827   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
3828     TEST_REQUIRES_X86_AVX;
3829     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3830       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3831         VAddMicrokernelTester()
3832           .batch_size(batch_size)
3833           .y_scale(y_scale)
3834           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3835       }
3836     }
3837   }
3838 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,qmin)3839   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmin) {
3840     TEST_REQUIRES_X86_AVX;
3841     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3842       VAddMicrokernelTester()
3843         .batch_size(batch_size)
3844         .qmin(128)
3845         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3846     }
3847   }
3848 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,qmax)3849   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmax) {
3850     TEST_REQUIRES_X86_AVX;
3851     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3852       VAddMicrokernelTester()
3853         .batch_size(batch_size)
3854         .qmax(128)
3855         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3856     }
3857   }
3858 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3859 
3860 
3861 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_eq_24)3862   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_eq_24) {
3863     TEST_REQUIRES_X86_AVX;
3864     VAddMicrokernelTester()
3865       .batch_size(24)
3866       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3867   }
3868 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_div_24)3869   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_div_24) {
3870     TEST_REQUIRES_X86_AVX;
3871     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3872       VAddMicrokernelTester()
3873         .batch_size(batch_size)
3874         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3875     }
3876   }
3877 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_lt_24)3878   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_lt_24) {
3879     TEST_REQUIRES_X86_AVX;
3880     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3881       VAddMicrokernelTester()
3882         .batch_size(batch_size)
3883         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3884     }
3885   }
3886 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_gt_24)3887   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_gt_24) {
3888     TEST_REQUIRES_X86_AVX;
3889     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3890       VAddMicrokernelTester()
3891         .batch_size(batch_size)
3892         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3893     }
3894   }
3895 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,inplace_a)3896   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_a) {
3897     TEST_REQUIRES_X86_AVX;
3898     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3899       VAddMicrokernelTester()
3900         .batch_size(batch_size)
3901         .inplace_a(true)
3902         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3903     }
3904   }
3905 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,inplace_b)3906   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_b) {
3907     TEST_REQUIRES_X86_AVX;
3908     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3909       VAddMicrokernelTester()
3910         .batch_size(batch_size)
3911         .inplace_b(true)
3912         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3913     }
3914   }
3915 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,inplace_a_and_b)3916   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_a_and_b) {
3917     TEST_REQUIRES_X86_AVX;
3918     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3919       VAddMicrokernelTester()
3920         .batch_size(batch_size)
3921         .inplace_a(true)
3922         .inplace_b(true)
3923         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3924     }
3925   }
3926 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,a_zero_point)3927   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, a_zero_point) {
3928     TEST_REQUIRES_X86_AVX;
3929     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3930       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3931         VAddMicrokernelTester()
3932           .batch_size(batch_size)
3933           .a_zero_point(a_zero_point)
3934           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3935       }
3936     }
3937   }
3938 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,b_zero_point)3939   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, b_zero_point) {
3940     TEST_REQUIRES_X86_AVX;
3941     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3942       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3943         VAddMicrokernelTester()
3944           .batch_size(batch_size)
3945           .b_zero_point(b_zero_point)
3946           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3947       }
3948     }
3949   }
3950 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,y_zero_point)3951   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, y_zero_point) {
3952     TEST_REQUIRES_X86_AVX;
3953     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3954       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3955         VAddMicrokernelTester()
3956           .batch_size(batch_size)
3957           .y_zero_point(y_zero_point)
3958           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3959       }
3960     }
3961   }
3962 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,a_scale)3963   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, a_scale) {
3964     TEST_REQUIRES_X86_AVX;
3965     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3966       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3967         VAddMicrokernelTester()
3968           .batch_size(batch_size)
3969           .a_scale(a_scale)
3970           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3971       }
3972     }
3973   }
3974 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,b_scale)3975   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, b_scale) {
3976     TEST_REQUIRES_X86_AVX;
3977     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3978       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3979         VAddMicrokernelTester()
3980           .batch_size(batch_size)
3981           .b_scale(b_scale)
3982           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3983       }
3984     }
3985   }
3986 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,y_scale)3987   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, y_scale) {
3988     TEST_REQUIRES_X86_AVX;
3989     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3990       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3991         VAddMicrokernelTester()
3992           .batch_size(batch_size)
3993           .y_scale(y_scale)
3994           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3995       }
3996     }
3997   }
3998 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,qmin)3999   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, qmin) {
4000     TEST_REQUIRES_X86_AVX;
4001     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4002       VAddMicrokernelTester()
4003         .batch_size(batch_size)
4004         .qmin(128)
4005         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4006     }
4007   }
4008 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,qmax)4009   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, qmax) {
4010     TEST_REQUIRES_X86_AVX;
4011     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4012       VAddMicrokernelTester()
4013         .batch_size(batch_size)
4014         .qmax(128)
4015         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4016     }
4017   }
4018 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4019 
4020 
4021 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_eq_32)4022   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_eq_32) {
4023     TEST_REQUIRES_X86_AVX;
4024     VAddMicrokernelTester()
4025       .batch_size(32)
4026       .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4027   }
4028 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_div_32)4029   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_div_32) {
4030     TEST_REQUIRES_X86_AVX;
4031     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4032       VAddMicrokernelTester()
4033         .batch_size(batch_size)
4034         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4035     }
4036   }
4037 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_lt_32)4038   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_lt_32) {
4039     TEST_REQUIRES_X86_AVX;
4040     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4041       VAddMicrokernelTester()
4042         .batch_size(batch_size)
4043         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4044     }
4045   }
4046 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_gt_32)4047   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_gt_32) {
4048     TEST_REQUIRES_X86_AVX;
4049     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4050       VAddMicrokernelTester()
4051         .batch_size(batch_size)
4052         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4053     }
4054   }
4055 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,inplace_a)4056   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_a) {
4057     TEST_REQUIRES_X86_AVX;
4058     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4059       VAddMicrokernelTester()
4060         .batch_size(batch_size)
4061         .inplace_a(true)
4062         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4063     }
4064   }
4065 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,inplace_b)4066   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_b) {
4067     TEST_REQUIRES_X86_AVX;
4068     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4069       VAddMicrokernelTester()
4070         .batch_size(batch_size)
4071         .inplace_b(true)
4072         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4073     }
4074   }
4075 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,inplace_a_and_b)4076   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_a_and_b) {
4077     TEST_REQUIRES_X86_AVX;
4078     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4079       VAddMicrokernelTester()
4080         .batch_size(batch_size)
4081         .inplace_a(true)
4082         .inplace_b(true)
4083         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4084     }
4085   }
4086 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,a_zero_point)4087   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, a_zero_point) {
4088     TEST_REQUIRES_X86_AVX;
4089     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4090       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4091         VAddMicrokernelTester()
4092           .batch_size(batch_size)
4093           .a_zero_point(a_zero_point)
4094           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4095       }
4096     }
4097   }
4098 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,b_zero_point)4099   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, b_zero_point) {
4100     TEST_REQUIRES_X86_AVX;
4101     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4102       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4103         VAddMicrokernelTester()
4104           .batch_size(batch_size)
4105           .b_zero_point(b_zero_point)
4106           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4107       }
4108     }
4109   }
4110 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,y_zero_point)4111   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, y_zero_point) {
4112     TEST_REQUIRES_X86_AVX;
4113     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4114       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4115         VAddMicrokernelTester()
4116           .batch_size(batch_size)
4117           .y_zero_point(y_zero_point)
4118           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4119       }
4120     }
4121   }
4122 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,a_scale)4123   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, a_scale) {
4124     TEST_REQUIRES_X86_AVX;
4125     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4126       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4127         VAddMicrokernelTester()
4128           .batch_size(batch_size)
4129           .a_scale(a_scale)
4130           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4131       }
4132     }
4133   }
4134 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,b_scale)4135   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, b_scale) {
4136     TEST_REQUIRES_X86_AVX;
4137     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4138       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4139         VAddMicrokernelTester()
4140           .batch_size(batch_size)
4141           .b_scale(b_scale)
4142           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4143       }
4144     }
4145   }
4146 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,y_scale)4147   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, y_scale) {
4148     TEST_REQUIRES_X86_AVX;
4149     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4150       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4151         VAddMicrokernelTester()
4152           .batch_size(batch_size)
4153           .y_scale(y_scale)
4154           .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4155       }
4156     }
4157   }
4158 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,qmin)4159   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, qmin) {
4160     TEST_REQUIRES_X86_AVX;
4161     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4162       VAddMicrokernelTester()
4163         .batch_size(batch_size)
4164         .qmin(128)
4165         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4166     }
4167   }
4168 
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,qmax)4169   TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, qmax) {
4170     TEST_REQUIRES_X86_AVX;
4171     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4172       VAddMicrokernelTester()
4173         .batch_size(batch_size)
4174         .qmax(128)
4175         .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4176     }
4177   }
4178 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4179 
4180 
4181 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_eq_8)4182   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
4183     TEST_REQUIRES_X86_XOP;
4184     VAddMicrokernelTester()
4185       .batch_size(8)
4186       .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4187   }
4188 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_div_8)4189   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
4190     TEST_REQUIRES_X86_XOP;
4191     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4192       VAddMicrokernelTester()
4193         .batch_size(batch_size)
4194         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4195     }
4196   }
4197 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_lt_8)4198   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
4199     TEST_REQUIRES_X86_XOP;
4200     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4201       VAddMicrokernelTester()
4202         .batch_size(batch_size)
4203         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4204     }
4205   }
4206 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_gt_8)4207   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
4208     TEST_REQUIRES_X86_XOP;
4209     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4210       VAddMicrokernelTester()
4211         .batch_size(batch_size)
4212         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4213     }
4214   }
4215 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_a)4216   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
4217     TEST_REQUIRES_X86_XOP;
4218     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4219       VAddMicrokernelTester()
4220         .batch_size(batch_size)
4221         .inplace_a(true)
4222         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4223     }
4224   }
4225 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_b)4226   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
4227     TEST_REQUIRES_X86_XOP;
4228     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4229       VAddMicrokernelTester()
4230         .batch_size(batch_size)
4231         .inplace_b(true)
4232         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4233     }
4234   }
4235 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_a_and_b)4236   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
4237     TEST_REQUIRES_X86_XOP;
4238     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4239       VAddMicrokernelTester()
4240         .batch_size(batch_size)
4241         .inplace_a(true)
4242         .inplace_b(true)
4243         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4244     }
4245   }
4246 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,a_zero_point)4247   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
4248     TEST_REQUIRES_X86_XOP;
4249     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4250       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4251         VAddMicrokernelTester()
4252           .batch_size(batch_size)
4253           .a_zero_point(a_zero_point)
4254           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4255       }
4256     }
4257   }
4258 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,b_zero_point)4259   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
4260     TEST_REQUIRES_X86_XOP;
4261     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4262       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4263         VAddMicrokernelTester()
4264           .batch_size(batch_size)
4265           .b_zero_point(b_zero_point)
4266           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4267       }
4268     }
4269   }
4270 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,y_zero_point)4271   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
4272     TEST_REQUIRES_X86_XOP;
4273     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4274       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4275         VAddMicrokernelTester()
4276           .batch_size(batch_size)
4277           .y_zero_point(y_zero_point)
4278           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4279       }
4280     }
4281   }
4282 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,a_scale)4283   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
4284     TEST_REQUIRES_X86_XOP;
4285     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4286       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4287         VAddMicrokernelTester()
4288           .batch_size(batch_size)
4289           .a_scale(a_scale)
4290           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4291       }
4292     }
4293   }
4294 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,b_scale)4295   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
4296     TEST_REQUIRES_X86_XOP;
4297     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4298       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4299         VAddMicrokernelTester()
4300           .batch_size(batch_size)
4301           .b_scale(b_scale)
4302           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4303       }
4304     }
4305   }
4306 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,y_scale)4307   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
4308     TEST_REQUIRES_X86_XOP;
4309     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4310       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4311         VAddMicrokernelTester()
4312           .batch_size(batch_size)
4313           .y_scale(y_scale)
4314           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4315       }
4316     }
4317   }
4318 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,qmin)4319   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
4320     TEST_REQUIRES_X86_XOP;
4321     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4322       VAddMicrokernelTester()
4323         .batch_size(batch_size)
4324         .qmin(128)
4325         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4326     }
4327   }
4328 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,qmax)4329   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
4330     TEST_REQUIRES_X86_XOP;
4331     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4332       VAddMicrokernelTester()
4333         .batch_size(batch_size)
4334         .qmax(128)
4335         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4336     }
4337   }
4338 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4339 
4340 
4341 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_eq_16)4342   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
4343     TEST_REQUIRES_X86_XOP;
4344     VAddMicrokernelTester()
4345       .batch_size(16)
4346       .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4347   }
4348 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_div_16)4349   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
4350     TEST_REQUIRES_X86_XOP;
4351     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4352       VAddMicrokernelTester()
4353         .batch_size(batch_size)
4354         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4355     }
4356   }
4357 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_lt_16)4358   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
4359     TEST_REQUIRES_X86_XOP;
4360     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4361       VAddMicrokernelTester()
4362         .batch_size(batch_size)
4363         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4364     }
4365   }
4366 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_gt_16)4367   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
4368     TEST_REQUIRES_X86_XOP;
4369     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4370       VAddMicrokernelTester()
4371         .batch_size(batch_size)
4372         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4373     }
4374   }
4375 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_a)4376   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
4377     TEST_REQUIRES_X86_XOP;
4378     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4379       VAddMicrokernelTester()
4380         .batch_size(batch_size)
4381         .inplace_a(true)
4382         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4383     }
4384   }
4385 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_b)4386   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
4387     TEST_REQUIRES_X86_XOP;
4388     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4389       VAddMicrokernelTester()
4390         .batch_size(batch_size)
4391         .inplace_b(true)
4392         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4393     }
4394   }
4395 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_a_and_b)4396   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
4397     TEST_REQUIRES_X86_XOP;
4398     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4399       VAddMicrokernelTester()
4400         .batch_size(batch_size)
4401         .inplace_a(true)
4402         .inplace_b(true)
4403         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4404     }
4405   }
4406 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,a_zero_point)4407   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
4408     TEST_REQUIRES_X86_XOP;
4409     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4410       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4411         VAddMicrokernelTester()
4412           .batch_size(batch_size)
4413           .a_zero_point(a_zero_point)
4414           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4415       }
4416     }
4417   }
4418 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,b_zero_point)4419   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
4420     TEST_REQUIRES_X86_XOP;
4421     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4422       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4423         VAddMicrokernelTester()
4424           .batch_size(batch_size)
4425           .b_zero_point(b_zero_point)
4426           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4427       }
4428     }
4429   }
4430 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,y_zero_point)4431   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
4432     TEST_REQUIRES_X86_XOP;
4433     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4434       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4435         VAddMicrokernelTester()
4436           .batch_size(batch_size)
4437           .y_zero_point(y_zero_point)
4438           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4439       }
4440     }
4441   }
4442 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,a_scale)4443   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
4444     TEST_REQUIRES_X86_XOP;
4445     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4446       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4447         VAddMicrokernelTester()
4448           .batch_size(batch_size)
4449           .a_scale(a_scale)
4450           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4451       }
4452     }
4453   }
4454 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,b_scale)4455   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
4456     TEST_REQUIRES_X86_XOP;
4457     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4458       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4459         VAddMicrokernelTester()
4460           .batch_size(batch_size)
4461           .b_scale(b_scale)
4462           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4463       }
4464     }
4465   }
4466 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,y_scale)4467   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
4468     TEST_REQUIRES_X86_XOP;
4469     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4470       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4471         VAddMicrokernelTester()
4472           .batch_size(batch_size)
4473           .y_scale(y_scale)
4474           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4475       }
4476     }
4477   }
4478 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,qmin)4479   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
4480     TEST_REQUIRES_X86_XOP;
4481     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4482       VAddMicrokernelTester()
4483         .batch_size(batch_size)
4484         .qmin(128)
4485         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4486     }
4487   }
4488 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,qmax)4489   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
4490     TEST_REQUIRES_X86_XOP;
4491     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4492       VAddMicrokernelTester()
4493         .batch_size(batch_size)
4494         .qmax(128)
4495         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4496     }
4497   }
4498 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4499 
4500 
4501 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_eq_24)4502   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
4503     TEST_REQUIRES_X86_XOP;
4504     VAddMicrokernelTester()
4505       .batch_size(24)
4506       .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4507   }
4508 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_div_24)4509   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
4510     TEST_REQUIRES_X86_XOP;
4511     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4512       VAddMicrokernelTester()
4513         .batch_size(batch_size)
4514         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4515     }
4516   }
4517 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_lt_24)4518   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
4519     TEST_REQUIRES_X86_XOP;
4520     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4521       VAddMicrokernelTester()
4522         .batch_size(batch_size)
4523         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4524     }
4525   }
4526 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_gt_24)4527   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
4528     TEST_REQUIRES_X86_XOP;
4529     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4530       VAddMicrokernelTester()
4531         .batch_size(batch_size)
4532         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4533     }
4534   }
4535 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,inplace_a)4536   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a) {
4537     TEST_REQUIRES_X86_XOP;
4538     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4539       VAddMicrokernelTester()
4540         .batch_size(batch_size)
4541         .inplace_a(true)
4542         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4543     }
4544   }
4545 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,inplace_b)4546   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_b) {
4547     TEST_REQUIRES_X86_XOP;
4548     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4549       VAddMicrokernelTester()
4550         .batch_size(batch_size)
4551         .inplace_b(true)
4552         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4553     }
4554   }
4555 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,inplace_a_and_b)4556   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a_and_b) {
4557     TEST_REQUIRES_X86_XOP;
4558     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4559       VAddMicrokernelTester()
4560         .batch_size(batch_size)
4561         .inplace_a(true)
4562         .inplace_b(true)
4563         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4564     }
4565   }
4566 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,a_zero_point)4567   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
4568     TEST_REQUIRES_X86_XOP;
4569     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4570       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4571         VAddMicrokernelTester()
4572           .batch_size(batch_size)
4573           .a_zero_point(a_zero_point)
4574           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4575       }
4576     }
4577   }
4578 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,b_zero_point)4579   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
4580     TEST_REQUIRES_X86_XOP;
4581     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4582       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4583         VAddMicrokernelTester()
4584           .batch_size(batch_size)
4585           .b_zero_point(b_zero_point)
4586           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4587       }
4588     }
4589   }
4590 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,y_zero_point)4591   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
4592     TEST_REQUIRES_X86_XOP;
4593     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4594       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4595         VAddMicrokernelTester()
4596           .batch_size(batch_size)
4597           .y_zero_point(y_zero_point)
4598           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4599       }
4600     }
4601   }
4602 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,a_scale)4603   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
4604     TEST_REQUIRES_X86_XOP;
4605     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4606       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4607         VAddMicrokernelTester()
4608           .batch_size(batch_size)
4609           .a_scale(a_scale)
4610           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4611       }
4612     }
4613   }
4614 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,b_scale)4615   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
4616     TEST_REQUIRES_X86_XOP;
4617     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4618       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4619         VAddMicrokernelTester()
4620           .batch_size(batch_size)
4621           .b_scale(b_scale)
4622           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4623       }
4624     }
4625   }
4626 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,y_scale)4627   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
4628     TEST_REQUIRES_X86_XOP;
4629     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4630       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4631         VAddMicrokernelTester()
4632           .batch_size(batch_size)
4633           .y_scale(y_scale)
4634           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4635       }
4636     }
4637   }
4638 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,qmin)4639   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmin) {
4640     TEST_REQUIRES_X86_XOP;
4641     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4642       VAddMicrokernelTester()
4643         .batch_size(batch_size)
4644         .qmin(128)
4645         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4646     }
4647   }
4648 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,qmax)4649   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmax) {
4650     TEST_REQUIRES_X86_XOP;
4651     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4652       VAddMicrokernelTester()
4653         .batch_size(batch_size)
4654         .qmax(128)
4655         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4656     }
4657   }
4658 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4659 
4660 
4661 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_eq_32)4662   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
4663     TEST_REQUIRES_X86_XOP;
4664     VAddMicrokernelTester()
4665       .batch_size(32)
4666       .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4667   }
4668 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_div_32)4669   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
4670     TEST_REQUIRES_X86_XOP;
4671     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4672       VAddMicrokernelTester()
4673         .batch_size(batch_size)
4674         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4675     }
4676   }
4677 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_lt_32)4678   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
4679     TEST_REQUIRES_X86_XOP;
4680     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4681       VAddMicrokernelTester()
4682         .batch_size(batch_size)
4683         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4684     }
4685   }
4686 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_gt_32)4687   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
4688     TEST_REQUIRES_X86_XOP;
4689     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4690       VAddMicrokernelTester()
4691         .batch_size(batch_size)
4692         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4693     }
4694   }
4695 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,inplace_a)4696   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a) {
4697     TEST_REQUIRES_X86_XOP;
4698     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4699       VAddMicrokernelTester()
4700         .batch_size(batch_size)
4701         .inplace_a(true)
4702         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4703     }
4704   }
4705 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,inplace_b)4706   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_b) {
4707     TEST_REQUIRES_X86_XOP;
4708     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4709       VAddMicrokernelTester()
4710         .batch_size(batch_size)
4711         .inplace_b(true)
4712         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4713     }
4714   }
4715 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,inplace_a_and_b)4716   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a_and_b) {
4717     TEST_REQUIRES_X86_XOP;
4718     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4719       VAddMicrokernelTester()
4720         .batch_size(batch_size)
4721         .inplace_a(true)
4722         .inplace_b(true)
4723         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4724     }
4725   }
4726 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,a_zero_point)4727   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
4728     TEST_REQUIRES_X86_XOP;
4729     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4730       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4731         VAddMicrokernelTester()
4732           .batch_size(batch_size)
4733           .a_zero_point(a_zero_point)
4734           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4735       }
4736     }
4737   }
4738 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,b_zero_point)4739   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
4740     TEST_REQUIRES_X86_XOP;
4741     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4742       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4743         VAddMicrokernelTester()
4744           .batch_size(batch_size)
4745           .b_zero_point(b_zero_point)
4746           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4747       }
4748     }
4749   }
4750 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,y_zero_point)4751   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
4752     TEST_REQUIRES_X86_XOP;
4753     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4754       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4755         VAddMicrokernelTester()
4756           .batch_size(batch_size)
4757           .y_zero_point(y_zero_point)
4758           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4759       }
4760     }
4761   }
4762 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,a_scale)4763   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
4764     TEST_REQUIRES_X86_XOP;
4765     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4766       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4767         VAddMicrokernelTester()
4768           .batch_size(batch_size)
4769           .a_scale(a_scale)
4770           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4771       }
4772     }
4773   }
4774 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,b_scale)4775   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
4776     TEST_REQUIRES_X86_XOP;
4777     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4778       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4779         VAddMicrokernelTester()
4780           .batch_size(batch_size)
4781           .b_scale(b_scale)
4782           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4783       }
4784     }
4785   }
4786 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,y_scale)4787   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
4788     TEST_REQUIRES_X86_XOP;
4789     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4790       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4791         VAddMicrokernelTester()
4792           .batch_size(batch_size)
4793           .y_scale(y_scale)
4794           .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4795       }
4796     }
4797   }
4798 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,qmin)4799   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmin) {
4800     TEST_REQUIRES_X86_XOP;
4801     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4802       VAddMicrokernelTester()
4803         .batch_size(batch_size)
4804         .qmin(128)
4805         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4806     }
4807   }
4808 
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,qmax)4809   TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmax) {
4810     TEST_REQUIRES_X86_XOP;
4811     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4812       VAddMicrokernelTester()
4813         .batch_size(batch_size)
4814         .qmax(128)
4815         .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4816     }
4817   }
4818 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4819 
4820 
4821 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_eq_8)4822   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
4823     TEST_REQUIRES_X86_AVX2;
4824     VAddMicrokernelTester()
4825       .batch_size(8)
4826       .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4827   }
4828 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_div_8)4829   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
4830     TEST_REQUIRES_X86_AVX2;
4831     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4832       VAddMicrokernelTester()
4833         .batch_size(batch_size)
4834         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4835     }
4836   }
4837 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_lt_8)4838   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
4839     TEST_REQUIRES_X86_AVX2;
4840     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4841       VAddMicrokernelTester()
4842         .batch_size(batch_size)
4843         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4844     }
4845   }
4846 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_gt_8)4847   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
4848     TEST_REQUIRES_X86_AVX2;
4849     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4850       VAddMicrokernelTester()
4851         .batch_size(batch_size)
4852         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4853     }
4854   }
4855 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_a)4856   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
4857     TEST_REQUIRES_X86_AVX2;
4858     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4859       VAddMicrokernelTester()
4860         .batch_size(batch_size)
4861         .inplace_a(true)
4862         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4863     }
4864   }
4865 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_b)4866   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
4867     TEST_REQUIRES_X86_AVX2;
4868     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4869       VAddMicrokernelTester()
4870         .batch_size(batch_size)
4871         .inplace_b(true)
4872         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4873     }
4874   }
4875 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_a_and_b)4876   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
4877     TEST_REQUIRES_X86_AVX2;
4878     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4879       VAddMicrokernelTester()
4880         .batch_size(batch_size)
4881         .inplace_a(true)
4882         .inplace_b(true)
4883         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4884     }
4885   }
4886 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,a_zero_point)4887   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
4888     TEST_REQUIRES_X86_AVX2;
4889     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4890       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4891         VAddMicrokernelTester()
4892           .batch_size(batch_size)
4893           .a_zero_point(a_zero_point)
4894           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4895       }
4896     }
4897   }
4898 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,b_zero_point)4899   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
4900     TEST_REQUIRES_X86_AVX2;
4901     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4902       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4903         VAddMicrokernelTester()
4904           .batch_size(batch_size)
4905           .b_zero_point(b_zero_point)
4906           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4907       }
4908     }
4909   }
4910 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,y_zero_point)4911   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
4912     TEST_REQUIRES_X86_AVX2;
4913     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4914       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4915         VAddMicrokernelTester()
4916           .batch_size(batch_size)
4917           .y_zero_point(y_zero_point)
4918           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4919       }
4920     }
4921   }
4922 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,a_scale)4923   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
4924     TEST_REQUIRES_X86_AVX2;
4925     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4926       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4927         VAddMicrokernelTester()
4928           .batch_size(batch_size)
4929           .a_scale(a_scale)
4930           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4931       }
4932     }
4933   }
4934 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,b_scale)4935   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
4936     TEST_REQUIRES_X86_AVX2;
4937     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4938       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4939         VAddMicrokernelTester()
4940           .batch_size(batch_size)
4941           .b_scale(b_scale)
4942           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4943       }
4944     }
4945   }
4946 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,y_scale)4947   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
4948     TEST_REQUIRES_X86_AVX2;
4949     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4950       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4951         VAddMicrokernelTester()
4952           .batch_size(batch_size)
4953           .y_scale(y_scale)
4954           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4955       }
4956     }
4957   }
4958 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,qmin)4959   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
4960     TEST_REQUIRES_X86_AVX2;
4961     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4962       VAddMicrokernelTester()
4963         .batch_size(batch_size)
4964         .qmin(128)
4965         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4966     }
4967   }
4968 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,qmax)4969   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
4970     TEST_REQUIRES_X86_AVX2;
4971     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4972       VAddMicrokernelTester()
4973         .batch_size(batch_size)
4974         .qmax(128)
4975         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4976     }
4977   }
4978 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4979 
4980 
4981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_eq_16)4982   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
4983     TEST_REQUIRES_X86_AVX2;
4984     VAddMicrokernelTester()
4985       .batch_size(16)
4986       .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4987   }
4988 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_div_16)4989   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
4990     TEST_REQUIRES_X86_AVX2;
4991     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4992       VAddMicrokernelTester()
4993         .batch_size(batch_size)
4994         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4995     }
4996   }
4997 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_lt_16)4998   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
4999     TEST_REQUIRES_X86_AVX2;
5000     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5001       VAddMicrokernelTester()
5002         .batch_size(batch_size)
5003         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5004     }
5005   }
5006 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_gt_16)5007   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
5008     TEST_REQUIRES_X86_AVX2;
5009     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5010       VAddMicrokernelTester()
5011         .batch_size(batch_size)
5012         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5013     }
5014   }
5015 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_a)5016   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
5017     TEST_REQUIRES_X86_AVX2;
5018     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5019       VAddMicrokernelTester()
5020         .batch_size(batch_size)
5021         .inplace_a(true)
5022         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5023     }
5024   }
5025 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_b)5026   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
5027     TEST_REQUIRES_X86_AVX2;
5028     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5029       VAddMicrokernelTester()
5030         .batch_size(batch_size)
5031         .inplace_b(true)
5032         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5033     }
5034   }
5035 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_a_and_b)5036   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
5037     TEST_REQUIRES_X86_AVX2;
5038     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5039       VAddMicrokernelTester()
5040         .batch_size(batch_size)
5041         .inplace_a(true)
5042         .inplace_b(true)
5043         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5044     }
5045   }
5046 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,a_zero_point)5047   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
5048     TEST_REQUIRES_X86_AVX2;
5049     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5050       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5051         VAddMicrokernelTester()
5052           .batch_size(batch_size)
5053           .a_zero_point(a_zero_point)
5054           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5055       }
5056     }
5057   }
5058 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,b_zero_point)5059   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
5060     TEST_REQUIRES_X86_AVX2;
5061     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5062       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5063         VAddMicrokernelTester()
5064           .batch_size(batch_size)
5065           .b_zero_point(b_zero_point)
5066           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5067       }
5068     }
5069   }
5070 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,y_zero_point)5071   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
5072     TEST_REQUIRES_X86_AVX2;
5073     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5074       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5075         VAddMicrokernelTester()
5076           .batch_size(batch_size)
5077           .y_zero_point(y_zero_point)
5078           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5079       }
5080     }
5081   }
5082 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,a_scale)5083   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
5084     TEST_REQUIRES_X86_AVX2;
5085     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5086       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5087         VAddMicrokernelTester()
5088           .batch_size(batch_size)
5089           .a_scale(a_scale)
5090           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5091       }
5092     }
5093   }
5094 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,b_scale)5095   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
5096     TEST_REQUIRES_X86_AVX2;
5097     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5098       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5099         VAddMicrokernelTester()
5100           .batch_size(batch_size)
5101           .b_scale(b_scale)
5102           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5103       }
5104     }
5105   }
5106 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,y_scale)5107   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
5108     TEST_REQUIRES_X86_AVX2;
5109     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5110       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5111         VAddMicrokernelTester()
5112           .batch_size(batch_size)
5113           .y_scale(y_scale)
5114           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5115       }
5116     }
5117   }
5118 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,qmin)5119   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
5120     TEST_REQUIRES_X86_AVX2;
5121     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5122       VAddMicrokernelTester()
5123         .batch_size(batch_size)
5124         .qmin(128)
5125         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5126     }
5127   }
5128 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,qmax)5129   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
5130     TEST_REQUIRES_X86_AVX2;
5131     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5132       VAddMicrokernelTester()
5133         .batch_size(batch_size)
5134         .qmax(128)
5135         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5136     }
5137   }
5138 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5139 
5140 
5141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_eq_24)5142   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
5143     TEST_REQUIRES_X86_AVX2;
5144     VAddMicrokernelTester()
5145       .batch_size(24)
5146       .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5147   }
5148 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_div_24)5149   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
5150     TEST_REQUIRES_X86_AVX2;
5151     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5152       VAddMicrokernelTester()
5153         .batch_size(batch_size)
5154         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5155     }
5156   }
5157 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_lt_24)5158   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
5159     TEST_REQUIRES_X86_AVX2;
5160     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5161       VAddMicrokernelTester()
5162         .batch_size(batch_size)
5163         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5164     }
5165   }
5166 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_gt_24)5167   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
5168     TEST_REQUIRES_X86_AVX2;
5169     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5170       VAddMicrokernelTester()
5171         .batch_size(batch_size)
5172         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5173     }
5174   }
5175 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,inplace_a)5176   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a) {
5177     TEST_REQUIRES_X86_AVX2;
5178     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5179       VAddMicrokernelTester()
5180         .batch_size(batch_size)
5181         .inplace_a(true)
5182         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5183     }
5184   }
5185 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,inplace_b)5186   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_b) {
5187     TEST_REQUIRES_X86_AVX2;
5188     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5189       VAddMicrokernelTester()
5190         .batch_size(batch_size)
5191         .inplace_b(true)
5192         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5193     }
5194   }
5195 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,inplace_a_and_b)5196   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a_and_b) {
5197     TEST_REQUIRES_X86_AVX2;
5198     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5199       VAddMicrokernelTester()
5200         .batch_size(batch_size)
5201         .inplace_a(true)
5202         .inplace_b(true)
5203         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5204     }
5205   }
5206 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,a_zero_point)5207   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
5208     TEST_REQUIRES_X86_AVX2;
5209     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5210       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5211         VAddMicrokernelTester()
5212           .batch_size(batch_size)
5213           .a_zero_point(a_zero_point)
5214           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5215       }
5216     }
5217   }
5218 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,b_zero_point)5219   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
5220     TEST_REQUIRES_X86_AVX2;
5221     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5222       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5223         VAddMicrokernelTester()
5224           .batch_size(batch_size)
5225           .b_zero_point(b_zero_point)
5226           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5227       }
5228     }
5229   }
5230 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,y_zero_point)5231   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
5232     TEST_REQUIRES_X86_AVX2;
5233     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5234       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5235         VAddMicrokernelTester()
5236           .batch_size(batch_size)
5237           .y_zero_point(y_zero_point)
5238           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5239       }
5240     }
5241   }
5242 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,a_scale)5243   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
5244     TEST_REQUIRES_X86_AVX2;
5245     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5246       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5247         VAddMicrokernelTester()
5248           .batch_size(batch_size)
5249           .a_scale(a_scale)
5250           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5251       }
5252     }
5253   }
5254 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,b_scale)5255   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
5256     TEST_REQUIRES_X86_AVX2;
5257     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5258       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5259         VAddMicrokernelTester()
5260           .batch_size(batch_size)
5261           .b_scale(b_scale)
5262           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5263       }
5264     }
5265   }
5266 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,y_scale)5267   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
5268     TEST_REQUIRES_X86_AVX2;
5269     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5270       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5271         VAddMicrokernelTester()
5272           .batch_size(batch_size)
5273           .y_scale(y_scale)
5274           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5275       }
5276     }
5277   }
5278 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,qmin)5279   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
5280     TEST_REQUIRES_X86_AVX2;
5281     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5282       VAddMicrokernelTester()
5283         .batch_size(batch_size)
5284         .qmin(128)
5285         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5286     }
5287   }
5288 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,qmax)5289   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
5290     TEST_REQUIRES_X86_AVX2;
5291     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5292       VAddMicrokernelTester()
5293         .batch_size(batch_size)
5294         .qmax(128)
5295         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5296     }
5297   }
5298 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5299 
5300 
5301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_eq_32)5302   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
5303     TEST_REQUIRES_X86_AVX2;
5304     VAddMicrokernelTester()
5305       .batch_size(32)
5306       .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5307   }
5308 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_div_32)5309   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
5310     TEST_REQUIRES_X86_AVX2;
5311     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5312       VAddMicrokernelTester()
5313         .batch_size(batch_size)
5314         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5315     }
5316   }
5317 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_lt_32)5318   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
5319     TEST_REQUIRES_X86_AVX2;
5320     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5321       VAddMicrokernelTester()
5322         .batch_size(batch_size)
5323         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5324     }
5325   }
5326 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_gt_32)5327   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
5328     TEST_REQUIRES_X86_AVX2;
5329     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5330       VAddMicrokernelTester()
5331         .batch_size(batch_size)
5332         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5333     }
5334   }
5335 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,inplace_a)5336   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a) {
5337     TEST_REQUIRES_X86_AVX2;
5338     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5339       VAddMicrokernelTester()
5340         .batch_size(batch_size)
5341         .inplace_a(true)
5342         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5343     }
5344   }
5345 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,inplace_b)5346   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_b) {
5347     TEST_REQUIRES_X86_AVX2;
5348     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5349       VAddMicrokernelTester()
5350         .batch_size(batch_size)
5351         .inplace_b(true)
5352         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5353     }
5354   }
5355 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,inplace_a_and_b)5356   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a_and_b) {
5357     TEST_REQUIRES_X86_AVX2;
5358     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5359       VAddMicrokernelTester()
5360         .batch_size(batch_size)
5361         .inplace_a(true)
5362         .inplace_b(true)
5363         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5364     }
5365   }
5366 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,a_zero_point)5367   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
5368     TEST_REQUIRES_X86_AVX2;
5369     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5370       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5371         VAddMicrokernelTester()
5372           .batch_size(batch_size)
5373           .a_zero_point(a_zero_point)
5374           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5375       }
5376     }
5377   }
5378 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,b_zero_point)5379   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
5380     TEST_REQUIRES_X86_AVX2;
5381     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5382       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5383         VAddMicrokernelTester()
5384           .batch_size(batch_size)
5385           .b_zero_point(b_zero_point)
5386           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5387       }
5388     }
5389   }
5390 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,y_zero_point)5391   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
5392     TEST_REQUIRES_X86_AVX2;
5393     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5394       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5395         VAddMicrokernelTester()
5396           .batch_size(batch_size)
5397           .y_zero_point(y_zero_point)
5398           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5399       }
5400     }
5401   }
5402 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,a_scale)5403   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
5404     TEST_REQUIRES_X86_AVX2;
5405     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5406       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5407         VAddMicrokernelTester()
5408           .batch_size(batch_size)
5409           .a_scale(a_scale)
5410           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5411       }
5412     }
5413   }
5414 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,b_scale)5415   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
5416     TEST_REQUIRES_X86_AVX2;
5417     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5418       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5419         VAddMicrokernelTester()
5420           .batch_size(batch_size)
5421           .b_scale(b_scale)
5422           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5423       }
5424     }
5425   }
5426 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,y_scale)5427   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
5428     TEST_REQUIRES_X86_AVX2;
5429     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5430       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5431         VAddMicrokernelTester()
5432           .batch_size(batch_size)
5433           .y_scale(y_scale)
5434           .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5435       }
5436     }
5437   }
5438 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,qmin)5439   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
5440     TEST_REQUIRES_X86_AVX2;
5441     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5442       VAddMicrokernelTester()
5443         .batch_size(batch_size)
5444         .qmin(128)
5445         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5446     }
5447   }
5448 
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,qmax)5449   TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
5450     TEST_REQUIRES_X86_AVX2;
5451     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5452       VAddMicrokernelTester()
5453         .batch_size(batch_size)
5454         .qmax(128)
5455         .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5456     }
5457   }
5458 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5459 
5460 
5461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_eq_16)5462   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
5463     TEST_REQUIRES_X86_AVX512SKX;
5464     VAddMicrokernelTester()
5465       .batch_size(16)
5466       .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5467   }
5468 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_div_16)5469   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
5470     TEST_REQUIRES_X86_AVX512SKX;
5471     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5472       VAddMicrokernelTester()
5473         .batch_size(batch_size)
5474         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5475     }
5476   }
5477 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_lt_16)5478   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
5479     TEST_REQUIRES_X86_AVX512SKX;
5480     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5481       VAddMicrokernelTester()
5482         .batch_size(batch_size)
5483         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5484     }
5485   }
5486 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_gt_16)5487   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
5488     TEST_REQUIRES_X86_AVX512SKX;
5489     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5490       VAddMicrokernelTester()
5491         .batch_size(batch_size)
5492         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5493     }
5494   }
5495 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_a)5496   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a) {
5497     TEST_REQUIRES_X86_AVX512SKX;
5498     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5499       VAddMicrokernelTester()
5500         .batch_size(batch_size)
5501         .inplace_a(true)
5502         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5503     }
5504   }
5505 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_b)5506   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_b) {
5507     TEST_REQUIRES_X86_AVX512SKX;
5508     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5509       VAddMicrokernelTester()
5510         .batch_size(batch_size)
5511         .inplace_b(true)
5512         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5513     }
5514   }
5515 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_a_and_b)5516   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a_and_b) {
5517     TEST_REQUIRES_X86_AVX512SKX;
5518     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5519       VAddMicrokernelTester()
5520         .batch_size(batch_size)
5521         .inplace_a(true)
5522         .inplace_b(true)
5523         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5524     }
5525   }
5526 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,a_zero_point)5527   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
5528     TEST_REQUIRES_X86_AVX512SKX;
5529     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5530       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5531         VAddMicrokernelTester()
5532           .batch_size(batch_size)
5533           .a_zero_point(a_zero_point)
5534           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5535       }
5536     }
5537   }
5538 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,b_zero_point)5539   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
5540     TEST_REQUIRES_X86_AVX512SKX;
5541     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5542       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5543         VAddMicrokernelTester()
5544           .batch_size(batch_size)
5545           .b_zero_point(b_zero_point)
5546           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5547       }
5548     }
5549   }
5550 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,y_zero_point)5551   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
5552     TEST_REQUIRES_X86_AVX512SKX;
5553     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5554       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5555         VAddMicrokernelTester()
5556           .batch_size(batch_size)
5557           .y_zero_point(y_zero_point)
5558           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5559       }
5560     }
5561   }
5562 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,a_scale)5563   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
5564     TEST_REQUIRES_X86_AVX512SKX;
5565     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5566       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5567         VAddMicrokernelTester()
5568           .batch_size(batch_size)
5569           .a_scale(a_scale)
5570           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5571       }
5572     }
5573   }
5574 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,b_scale)5575   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
5576     TEST_REQUIRES_X86_AVX512SKX;
5577     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5578       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5579         VAddMicrokernelTester()
5580           .batch_size(batch_size)
5581           .b_scale(b_scale)
5582           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5583       }
5584     }
5585   }
5586 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,y_scale)5587   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
5588     TEST_REQUIRES_X86_AVX512SKX;
5589     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5590       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5591         VAddMicrokernelTester()
5592           .batch_size(batch_size)
5593           .y_scale(y_scale)
5594           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5595       }
5596     }
5597   }
5598 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,qmin)5599   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
5600     TEST_REQUIRES_X86_AVX512SKX;
5601     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5602       VAddMicrokernelTester()
5603         .batch_size(batch_size)
5604         .qmin(128)
5605         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5606     }
5607   }
5608 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,qmax)5609   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
5610     TEST_REQUIRES_X86_AVX512SKX;
5611     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5612       VAddMicrokernelTester()
5613         .batch_size(batch_size)
5614         .qmax(128)
5615         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5616     }
5617   }
5618 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5619 
5620 
5621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_eq_32)5622   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
5623     TEST_REQUIRES_X86_AVX512SKX;
5624     VAddMicrokernelTester()
5625       .batch_size(32)
5626       .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5627   }
5628 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_div_32)5629   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
5630     TEST_REQUIRES_X86_AVX512SKX;
5631     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5632       VAddMicrokernelTester()
5633         .batch_size(batch_size)
5634         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5635     }
5636   }
5637 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_lt_32)5638   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
5639     TEST_REQUIRES_X86_AVX512SKX;
5640     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5641       VAddMicrokernelTester()
5642         .batch_size(batch_size)
5643         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5644     }
5645   }
5646 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_gt_32)5647   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
5648     TEST_REQUIRES_X86_AVX512SKX;
5649     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5650       VAddMicrokernelTester()
5651         .batch_size(batch_size)
5652         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5653     }
5654   }
5655 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_a)5656   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a) {
5657     TEST_REQUIRES_X86_AVX512SKX;
5658     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5659       VAddMicrokernelTester()
5660         .batch_size(batch_size)
5661         .inplace_a(true)
5662         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5663     }
5664   }
5665 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_b)5666   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_b) {
5667     TEST_REQUIRES_X86_AVX512SKX;
5668     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5669       VAddMicrokernelTester()
5670         .batch_size(batch_size)
5671         .inplace_b(true)
5672         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5673     }
5674   }
5675 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_a_and_b)5676   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a_and_b) {
5677     TEST_REQUIRES_X86_AVX512SKX;
5678     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5679       VAddMicrokernelTester()
5680         .batch_size(batch_size)
5681         .inplace_a(true)
5682         .inplace_b(true)
5683         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5684     }
5685   }
5686 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,a_zero_point)5687   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
5688     TEST_REQUIRES_X86_AVX512SKX;
5689     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5690       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5691         VAddMicrokernelTester()
5692           .batch_size(batch_size)
5693           .a_zero_point(a_zero_point)
5694           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5695       }
5696     }
5697   }
5698 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,b_zero_point)5699   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
5700     TEST_REQUIRES_X86_AVX512SKX;
5701     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5702       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5703         VAddMicrokernelTester()
5704           .batch_size(batch_size)
5705           .b_zero_point(b_zero_point)
5706           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5707       }
5708     }
5709   }
5710 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,y_zero_point)5711   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
5712     TEST_REQUIRES_X86_AVX512SKX;
5713     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5714       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5715         VAddMicrokernelTester()
5716           .batch_size(batch_size)
5717           .y_zero_point(y_zero_point)
5718           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5719       }
5720     }
5721   }
5722 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,a_scale)5723   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
5724     TEST_REQUIRES_X86_AVX512SKX;
5725     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5726       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5727         VAddMicrokernelTester()
5728           .batch_size(batch_size)
5729           .a_scale(a_scale)
5730           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5731       }
5732     }
5733   }
5734 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,b_scale)5735   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
5736     TEST_REQUIRES_X86_AVX512SKX;
5737     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5738       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5739         VAddMicrokernelTester()
5740           .batch_size(batch_size)
5741           .b_scale(b_scale)
5742           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5743       }
5744     }
5745   }
5746 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,y_scale)5747   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
5748     TEST_REQUIRES_X86_AVX512SKX;
5749     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5750       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5751         VAddMicrokernelTester()
5752           .batch_size(batch_size)
5753           .y_scale(y_scale)
5754           .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5755       }
5756     }
5757   }
5758 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,qmin)5759   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
5760     TEST_REQUIRES_X86_AVX512SKX;
5761     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5762       VAddMicrokernelTester()
5763         .batch_size(batch_size)
5764         .qmin(128)
5765         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5766     }
5767   }
5768 
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,qmax)5769   TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
5770     TEST_REQUIRES_X86_AVX512SKX;
5771     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5772       VAddMicrokernelTester()
5773         .batch_size(batch_size)
5774         .qmax(128)
5775         .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5776     }
5777   }
5778 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5779 
5780 
5781 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_eq_8)5782   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
5783     VAddMicrokernelTester()
5784       .batch_size(8)
5785       .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5786   }
5787 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_div_8)5788   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
5789     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5790       VAddMicrokernelTester()
5791         .batch_size(batch_size)
5792         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5793     }
5794   }
5795 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_lt_8)5796   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
5797     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5798       VAddMicrokernelTester()
5799         .batch_size(batch_size)
5800         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5801     }
5802   }
5803 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_gt_8)5804   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
5805     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5806       VAddMicrokernelTester()
5807         .batch_size(batch_size)
5808         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5809     }
5810   }
5811 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,inplace_a)5812   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
5813     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5814       VAddMicrokernelTester()
5815         .batch_size(batch_size)
5816         .inplace_a(true)
5817         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5818     }
5819   }
5820 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,inplace_b)5821   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
5822     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5823       VAddMicrokernelTester()
5824         .batch_size(batch_size)
5825         .inplace_b(true)
5826         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5827     }
5828   }
5829 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,inplace_a_and_b)5830   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
5831     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5832       VAddMicrokernelTester()
5833         .batch_size(batch_size)
5834         .inplace_a(true)
5835         .inplace_b(true)
5836         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5837     }
5838   }
5839 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,a_zero_point)5840   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
5841     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5842       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5843         VAddMicrokernelTester()
5844           .batch_size(batch_size)
5845           .a_zero_point(a_zero_point)
5846           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5847       }
5848     }
5849   }
5850 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,b_zero_point)5851   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
5852     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5853       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5854         VAddMicrokernelTester()
5855           .batch_size(batch_size)
5856           .b_zero_point(b_zero_point)
5857           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5858       }
5859     }
5860   }
5861 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,y_zero_point)5862   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
5863     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5864       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5865         VAddMicrokernelTester()
5866           .batch_size(batch_size)
5867           .y_zero_point(y_zero_point)
5868           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5869       }
5870     }
5871   }
5872 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,a_scale)5873   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
5874     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5875       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5876         VAddMicrokernelTester()
5877           .batch_size(batch_size)
5878           .a_scale(a_scale)
5879           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5880       }
5881     }
5882   }
5883 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,b_scale)5884   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
5885     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5886       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5887         VAddMicrokernelTester()
5888           .batch_size(batch_size)
5889           .b_scale(b_scale)
5890           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5891       }
5892     }
5893   }
5894 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,y_scale)5895   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
5896     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5897       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5898         VAddMicrokernelTester()
5899           .batch_size(batch_size)
5900           .y_scale(y_scale)
5901           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5902       }
5903     }
5904   }
5905 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,qmin)5906   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmin) {
5907     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5908       VAddMicrokernelTester()
5909         .batch_size(batch_size)
5910         .qmin(128)
5911         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5912     }
5913   }
5914 
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,qmax)5915   TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmax) {
5916     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5917       VAddMicrokernelTester()
5918         .batch_size(batch_size)
5919         .qmax(128)
5920         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5921     }
5922   }
5923 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5924 
5925 
5926 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_eq_16)5927   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
5928     VAddMicrokernelTester()
5929       .batch_size(16)
5930       .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5931   }
5932 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_div_16)5933   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
5934     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5935       VAddMicrokernelTester()
5936         .batch_size(batch_size)
5937         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5938     }
5939   }
5940 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_lt_16)5941   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
5942     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5943       VAddMicrokernelTester()
5944         .batch_size(batch_size)
5945         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5946     }
5947   }
5948 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_gt_16)5949   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
5950     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5951       VAddMicrokernelTester()
5952         .batch_size(batch_size)
5953         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5954     }
5955   }
5956 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,inplace_a)5957   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
5958     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5959       VAddMicrokernelTester()
5960         .batch_size(batch_size)
5961         .inplace_a(true)
5962         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5963     }
5964   }
5965 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,inplace_b)5966   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
5967     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5968       VAddMicrokernelTester()
5969         .batch_size(batch_size)
5970         .inplace_b(true)
5971         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5972     }
5973   }
5974 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,inplace_a_and_b)5975   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
5976     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5977       VAddMicrokernelTester()
5978         .batch_size(batch_size)
5979         .inplace_a(true)
5980         .inplace_b(true)
5981         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5982     }
5983   }
5984 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,a_zero_point)5985   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
5986     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5987       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5988         VAddMicrokernelTester()
5989           .batch_size(batch_size)
5990           .a_zero_point(a_zero_point)
5991           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5992       }
5993     }
5994   }
5995 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,b_zero_point)5996   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
5997     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5998       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5999         VAddMicrokernelTester()
6000           .batch_size(batch_size)
6001           .b_zero_point(b_zero_point)
6002           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6003       }
6004     }
6005   }
6006 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,y_zero_point)6007   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
6008     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6009       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6010         VAddMicrokernelTester()
6011           .batch_size(batch_size)
6012           .y_zero_point(y_zero_point)
6013           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6014       }
6015     }
6016   }
6017 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,a_scale)6018   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
6019     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6020       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6021         VAddMicrokernelTester()
6022           .batch_size(batch_size)
6023           .a_scale(a_scale)
6024           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6025       }
6026     }
6027   }
6028 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,b_scale)6029   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
6030     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6031       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6032         VAddMicrokernelTester()
6033           .batch_size(batch_size)
6034           .b_scale(b_scale)
6035           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6036       }
6037     }
6038   }
6039 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,y_scale)6040   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
6041     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6042       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6043         VAddMicrokernelTester()
6044           .batch_size(batch_size)
6045           .y_scale(y_scale)
6046           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6047       }
6048     }
6049   }
6050 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,qmin)6051   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmin) {
6052     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6053       VAddMicrokernelTester()
6054         .batch_size(batch_size)
6055         .qmin(128)
6056         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6057     }
6058   }
6059 
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,qmax)6060   TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmax) {
6061     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6062       VAddMicrokernelTester()
6063         .batch_size(batch_size)
6064         .qmax(128)
6065         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6066     }
6067   }
6068 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6069 
6070 
6071 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_eq_24)6072   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_eq_24) {
6073     VAddMicrokernelTester()
6074       .batch_size(24)
6075       .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6076   }
6077 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_div_24)6078   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_div_24) {
6079     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6080       VAddMicrokernelTester()
6081         .batch_size(batch_size)
6082         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6083     }
6084   }
6085 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_lt_24)6086   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_lt_24) {
6087     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6088       VAddMicrokernelTester()
6089         .batch_size(batch_size)
6090         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6091     }
6092   }
6093 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_gt_24)6094   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_gt_24) {
6095     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6096       VAddMicrokernelTester()
6097         .batch_size(batch_size)
6098         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6099     }
6100   }
6101 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,inplace_a)6102   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a) {
6103     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6104       VAddMicrokernelTester()
6105         .batch_size(batch_size)
6106         .inplace_a(true)
6107         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6108     }
6109   }
6110 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,inplace_b)6111   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_b) {
6112     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6113       VAddMicrokernelTester()
6114         .batch_size(batch_size)
6115         .inplace_b(true)
6116         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6117     }
6118   }
6119 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,inplace_a_and_b)6120   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a_and_b) {
6121     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6122       VAddMicrokernelTester()
6123         .batch_size(batch_size)
6124         .inplace_a(true)
6125         .inplace_b(true)
6126         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6127     }
6128   }
6129 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,a_zero_point)6130   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_zero_point) {
6131     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6132       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6133         VAddMicrokernelTester()
6134           .batch_size(batch_size)
6135           .a_zero_point(a_zero_point)
6136           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6137       }
6138     }
6139   }
6140 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,b_zero_point)6141   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_zero_point) {
6142     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6143       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6144         VAddMicrokernelTester()
6145           .batch_size(batch_size)
6146           .b_zero_point(b_zero_point)
6147           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6148       }
6149     }
6150   }
6151 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,y_zero_point)6152   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_zero_point) {
6153     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6154       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6155         VAddMicrokernelTester()
6156           .batch_size(batch_size)
6157           .y_zero_point(y_zero_point)
6158           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6159       }
6160     }
6161   }
6162 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,a_scale)6163   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_scale) {
6164     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6165       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6166         VAddMicrokernelTester()
6167           .batch_size(batch_size)
6168           .a_scale(a_scale)
6169           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6170       }
6171     }
6172   }
6173 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,b_scale)6174   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_scale) {
6175     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6176       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6177         VAddMicrokernelTester()
6178           .batch_size(batch_size)
6179           .b_scale(b_scale)
6180           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6181       }
6182     }
6183   }
6184 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,y_scale)6185   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_scale) {
6186     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6187       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6188         VAddMicrokernelTester()
6189           .batch_size(batch_size)
6190           .y_scale(y_scale)
6191           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6192       }
6193     }
6194   }
6195 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,qmin)6196   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmin) {
6197     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6198       VAddMicrokernelTester()
6199         .batch_size(batch_size)
6200         .qmin(128)
6201         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6202     }
6203   }
6204 
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,qmax)6205   TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmax) {
6206     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6207       VAddMicrokernelTester()
6208         .batch_size(batch_size)
6209         .qmax(128)
6210         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6211     }
6212   }
6213 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6214 
6215 
6216 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_eq_32)6217   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_eq_32) {
6218     VAddMicrokernelTester()
6219       .batch_size(32)
6220       .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6221   }
6222 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_div_32)6223   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_div_32) {
6224     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6225       VAddMicrokernelTester()
6226         .batch_size(batch_size)
6227         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6228     }
6229   }
6230 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_lt_32)6231   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_lt_32) {
6232     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6233       VAddMicrokernelTester()
6234         .batch_size(batch_size)
6235         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6236     }
6237   }
6238 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_gt_32)6239   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_gt_32) {
6240     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6241       VAddMicrokernelTester()
6242         .batch_size(batch_size)
6243         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6244     }
6245   }
6246 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,inplace_a)6247   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a) {
6248     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6249       VAddMicrokernelTester()
6250         .batch_size(batch_size)
6251         .inplace_a(true)
6252         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6253     }
6254   }
6255 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,inplace_b)6256   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_b) {
6257     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6258       VAddMicrokernelTester()
6259         .batch_size(batch_size)
6260         .inplace_b(true)
6261         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6262     }
6263   }
6264 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,inplace_a_and_b)6265   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a_and_b) {
6266     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6267       VAddMicrokernelTester()
6268         .batch_size(batch_size)
6269         .inplace_a(true)
6270         .inplace_b(true)
6271         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6272     }
6273   }
6274 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,a_zero_point)6275   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_zero_point) {
6276     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6277       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6278         VAddMicrokernelTester()
6279           .batch_size(batch_size)
6280           .a_zero_point(a_zero_point)
6281           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6282       }
6283     }
6284   }
6285 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,b_zero_point)6286   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_zero_point) {
6287     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6288       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6289         VAddMicrokernelTester()
6290           .batch_size(batch_size)
6291           .b_zero_point(b_zero_point)
6292           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6293       }
6294     }
6295   }
6296 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,y_zero_point)6297   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_zero_point) {
6298     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6299       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6300         VAddMicrokernelTester()
6301           .batch_size(batch_size)
6302           .y_zero_point(y_zero_point)
6303           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6304       }
6305     }
6306   }
6307 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,a_scale)6308   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_scale) {
6309     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6310       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6311         VAddMicrokernelTester()
6312           .batch_size(batch_size)
6313           .a_scale(a_scale)
6314           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6315       }
6316     }
6317   }
6318 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,b_scale)6319   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_scale) {
6320     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6321       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6322         VAddMicrokernelTester()
6323           .batch_size(batch_size)
6324           .b_scale(b_scale)
6325           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6326       }
6327     }
6328   }
6329 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,y_scale)6330   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_scale) {
6331     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6332       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6333         VAddMicrokernelTester()
6334           .batch_size(batch_size)
6335           .y_scale(y_scale)
6336           .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6337       }
6338     }
6339   }
6340 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,qmin)6341   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmin) {
6342     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6343       VAddMicrokernelTester()
6344         .batch_size(batch_size)
6345         .qmin(128)
6346         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6347     }
6348   }
6349 
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,qmax)6350   TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmax) {
6351     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6352       VAddMicrokernelTester()
6353         .batch_size(batch_size)
6354         .qmax(128)
6355         .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6356     }
6357   }
6358 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6359 
6360 
TEST(QS8_VADD_MINMAX__SCALAR_X1,batch_eq_1)6361 TEST(QS8_VADD_MINMAX__SCALAR_X1, batch_eq_1) {
6362   VAddMicrokernelTester()
6363     .batch_size(1)
6364     .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6365 }
6366 
TEST(QS8_VADD_MINMAX__SCALAR_X1,batch_gt_1)6367 TEST(QS8_VADD_MINMAX__SCALAR_X1, batch_gt_1) {
6368   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
6369     VAddMicrokernelTester()
6370       .batch_size(batch_size)
6371       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6372   }
6373 }
6374 
TEST(QS8_VADD_MINMAX__SCALAR_X1,inplace_a)6375 TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_a) {
6376   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6377     VAddMicrokernelTester()
6378       .batch_size(batch_size)
6379       .inplace_a(true)
6380       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6381   }
6382 }
6383 
TEST(QS8_VADD_MINMAX__SCALAR_X1,inplace_b)6384 TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_b) {
6385   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6386     VAddMicrokernelTester()
6387       .batch_size(batch_size)
6388       .inplace_b(true)
6389       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6390   }
6391 }
6392 
TEST(QS8_VADD_MINMAX__SCALAR_X1,inplace_a_and_b)6393 TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_a_and_b) {
6394   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6395     VAddMicrokernelTester()
6396       .batch_size(batch_size)
6397       .inplace_a(true)
6398       .inplace_b(true)
6399       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6400   }
6401 }
6402 
TEST(QS8_VADD_MINMAX__SCALAR_X1,a_zero_point)6403 TEST(QS8_VADD_MINMAX__SCALAR_X1, a_zero_point) {
6404   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6405     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6406       VAddMicrokernelTester()
6407         .batch_size(batch_size)
6408         .a_zero_point(a_zero_point)
6409         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6410     }
6411   }
6412 }
6413 
TEST(QS8_VADD_MINMAX__SCALAR_X1,b_zero_point)6414 TEST(QS8_VADD_MINMAX__SCALAR_X1, b_zero_point) {
6415   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6416     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6417       VAddMicrokernelTester()
6418         .batch_size(batch_size)
6419         .b_zero_point(b_zero_point)
6420         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6421     }
6422   }
6423 }
6424 
TEST(QS8_VADD_MINMAX__SCALAR_X1,y_zero_point)6425 TEST(QS8_VADD_MINMAX__SCALAR_X1, y_zero_point) {
6426   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6427     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6428       VAddMicrokernelTester()
6429         .batch_size(batch_size)
6430         .y_zero_point(y_zero_point)
6431         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6432     }
6433   }
6434 }
6435 
TEST(QS8_VADD_MINMAX__SCALAR_X1,a_scale)6436 TEST(QS8_VADD_MINMAX__SCALAR_X1, a_scale) {
6437   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6438     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6439       VAddMicrokernelTester()
6440         .batch_size(batch_size)
6441         .a_scale(a_scale)
6442         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6443     }
6444   }
6445 }
6446 
TEST(QS8_VADD_MINMAX__SCALAR_X1,b_scale)6447 TEST(QS8_VADD_MINMAX__SCALAR_X1, b_scale) {
6448   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6449     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6450       VAddMicrokernelTester()
6451         .batch_size(batch_size)
6452         .b_scale(b_scale)
6453         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6454     }
6455   }
6456 }
6457 
TEST(QS8_VADD_MINMAX__SCALAR_X1,y_scale)6458 TEST(QS8_VADD_MINMAX__SCALAR_X1, y_scale) {
6459   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6460     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6461       VAddMicrokernelTester()
6462         .batch_size(batch_size)
6463         .y_scale(y_scale)
6464         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6465     }
6466   }
6467 }
6468 
TEST(QS8_VADD_MINMAX__SCALAR_X1,qmin)6469 TEST(QS8_VADD_MINMAX__SCALAR_X1, qmin) {
6470   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6471     VAddMicrokernelTester()
6472       .batch_size(batch_size)
6473       .qmin(128)
6474       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6475   }
6476 }
6477 
TEST(QS8_VADD_MINMAX__SCALAR_X1,qmax)6478 TEST(QS8_VADD_MINMAX__SCALAR_X1, qmax) {
6479   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6480     VAddMicrokernelTester()
6481       .batch_size(batch_size)
6482       .qmax(128)
6483       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6484   }
6485 }
6486 
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_eq_2)6487 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_eq_2) {
6488   VAddMicrokernelTester()
6489     .batch_size(2)
6490     .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6491 }
6492 
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_div_2)6493 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_div_2) {
6494   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
6495     VAddMicrokernelTester()
6496       .batch_size(batch_size)
6497       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6498   }
6499 }
6500 
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_lt_2)6501 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_lt_2) {
6502   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
6503     VAddMicrokernelTester()
6504       .batch_size(batch_size)
6505       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6506   }
6507 }
6508 
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_gt_2)6509 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_gt_2) {
6510   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
6511     VAddMicrokernelTester()
6512       .batch_size(batch_size)
6513       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6514   }
6515 }
6516 
TEST(QS8_VADD_MINMAX__SCALAR_X2,inplace_a)6517 TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_a) {
6518   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6519     VAddMicrokernelTester()
6520       .batch_size(batch_size)
6521       .inplace_a(true)
6522       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6523   }
6524 }
6525 
TEST(QS8_VADD_MINMAX__SCALAR_X2,inplace_b)6526 TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_b) {
6527   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6528     VAddMicrokernelTester()
6529       .batch_size(batch_size)
6530       .inplace_b(true)
6531       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6532   }
6533 }
6534 
TEST(QS8_VADD_MINMAX__SCALAR_X2,inplace_a_and_b)6535 TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_a_and_b) {
6536   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6537     VAddMicrokernelTester()
6538       .batch_size(batch_size)
6539       .inplace_a(true)
6540       .inplace_b(true)
6541       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6542   }
6543 }
6544 
TEST(QS8_VADD_MINMAX__SCALAR_X2,a_zero_point)6545 TEST(QS8_VADD_MINMAX__SCALAR_X2, a_zero_point) {
6546   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6547     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6548       VAddMicrokernelTester()
6549         .batch_size(batch_size)
6550         .a_zero_point(a_zero_point)
6551         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6552     }
6553   }
6554 }
6555 
TEST(QS8_VADD_MINMAX__SCALAR_X2,b_zero_point)6556 TEST(QS8_VADD_MINMAX__SCALAR_X2, b_zero_point) {
6557   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6558     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6559       VAddMicrokernelTester()
6560         .batch_size(batch_size)
6561         .b_zero_point(b_zero_point)
6562         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6563     }
6564   }
6565 }
6566 
TEST(QS8_VADD_MINMAX__SCALAR_X2,y_zero_point)6567 TEST(QS8_VADD_MINMAX__SCALAR_X2, y_zero_point) {
6568   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6569     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6570       VAddMicrokernelTester()
6571         .batch_size(batch_size)
6572         .y_zero_point(y_zero_point)
6573         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6574     }
6575   }
6576 }
6577 
TEST(QS8_VADD_MINMAX__SCALAR_X2,a_scale)6578 TEST(QS8_VADD_MINMAX__SCALAR_X2, a_scale) {
6579   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6580     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6581       VAddMicrokernelTester()
6582         .batch_size(batch_size)
6583         .a_scale(a_scale)
6584         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6585     }
6586   }
6587 }
6588 
TEST(QS8_VADD_MINMAX__SCALAR_X2,b_scale)6589 TEST(QS8_VADD_MINMAX__SCALAR_X2, b_scale) {
6590   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6591     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6592       VAddMicrokernelTester()
6593         .batch_size(batch_size)
6594         .b_scale(b_scale)
6595         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6596     }
6597   }
6598 }
6599 
TEST(QS8_VADD_MINMAX__SCALAR_X2,y_scale)6600 TEST(QS8_VADD_MINMAX__SCALAR_X2, y_scale) {
6601   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6602     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6603       VAddMicrokernelTester()
6604         .batch_size(batch_size)
6605         .y_scale(y_scale)
6606         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6607     }
6608   }
6609 }
6610 
TEST(QS8_VADD_MINMAX__SCALAR_X2,qmin)6611 TEST(QS8_VADD_MINMAX__SCALAR_X2, qmin) {
6612   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6613     VAddMicrokernelTester()
6614       .batch_size(batch_size)
6615       .qmin(128)
6616       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6617   }
6618 }
6619 
TEST(QS8_VADD_MINMAX__SCALAR_X2,qmax)6620 TEST(QS8_VADD_MINMAX__SCALAR_X2, qmax) {
6621   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6622     VAddMicrokernelTester()
6623       .batch_size(batch_size)
6624       .qmax(128)
6625       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6626   }
6627 }
6628 
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_eq_4)6629 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_eq_4) {
6630   VAddMicrokernelTester()
6631     .batch_size(4)
6632     .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6633 }
6634 
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_div_4)6635 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_div_4) {
6636   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
6637     VAddMicrokernelTester()
6638       .batch_size(batch_size)
6639       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6640   }
6641 }
6642 
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_lt_4)6643 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_lt_4) {
6644   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
6645     VAddMicrokernelTester()
6646       .batch_size(batch_size)
6647       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6648   }
6649 }
6650 
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_gt_4)6651 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_gt_4) {
6652   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
6653     VAddMicrokernelTester()
6654       .batch_size(batch_size)
6655       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6656   }
6657 }
6658 
TEST(QS8_VADD_MINMAX__SCALAR_X4,inplace_a)6659 TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_a) {
6660   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6661     VAddMicrokernelTester()
6662       .batch_size(batch_size)
6663       .inplace_a(true)
6664       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6665   }
6666 }
6667 
TEST(QS8_VADD_MINMAX__SCALAR_X4,inplace_b)6668 TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_b) {
6669   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6670     VAddMicrokernelTester()
6671       .batch_size(batch_size)
6672       .inplace_b(true)
6673       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6674   }
6675 }
6676 
TEST(QS8_VADD_MINMAX__SCALAR_X4,inplace_a_and_b)6677 TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_a_and_b) {
6678   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6679     VAddMicrokernelTester()
6680       .batch_size(batch_size)
6681       .inplace_a(true)
6682       .inplace_b(true)
6683       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6684   }
6685 }
6686 
TEST(QS8_VADD_MINMAX__SCALAR_X4,a_zero_point)6687 TEST(QS8_VADD_MINMAX__SCALAR_X4, a_zero_point) {
6688   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6689     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6690       VAddMicrokernelTester()
6691         .batch_size(batch_size)
6692         .a_zero_point(a_zero_point)
6693         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6694     }
6695   }
6696 }
6697 
TEST(QS8_VADD_MINMAX__SCALAR_X4,b_zero_point)6698 TEST(QS8_VADD_MINMAX__SCALAR_X4, b_zero_point) {
6699   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6700     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6701       VAddMicrokernelTester()
6702         .batch_size(batch_size)
6703         .b_zero_point(b_zero_point)
6704         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6705     }
6706   }
6707 }
6708 
TEST(QS8_VADD_MINMAX__SCALAR_X4,y_zero_point)6709 TEST(QS8_VADD_MINMAX__SCALAR_X4, y_zero_point) {
6710   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6711     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6712       VAddMicrokernelTester()
6713         .batch_size(batch_size)
6714         .y_zero_point(y_zero_point)
6715         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6716     }
6717   }
6718 }
6719 
TEST(QS8_VADD_MINMAX__SCALAR_X4,a_scale)6720 TEST(QS8_VADD_MINMAX__SCALAR_X4, a_scale) {
6721   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6722     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6723       VAddMicrokernelTester()
6724         .batch_size(batch_size)
6725         .a_scale(a_scale)
6726         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6727     }
6728   }
6729 }
6730 
TEST(QS8_VADD_MINMAX__SCALAR_X4,b_scale)6731 TEST(QS8_VADD_MINMAX__SCALAR_X4, b_scale) {
6732   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6733     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6734       VAddMicrokernelTester()
6735         .batch_size(batch_size)
6736         .b_scale(b_scale)
6737         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6738     }
6739   }
6740 }
6741 
TEST(QS8_VADD_MINMAX__SCALAR_X4,y_scale)6742 TEST(QS8_VADD_MINMAX__SCALAR_X4, y_scale) {
6743   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6744     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6745       VAddMicrokernelTester()
6746         .batch_size(batch_size)
6747         .y_scale(y_scale)
6748         .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6749     }
6750   }
6751 }
6752 
TEST(QS8_VADD_MINMAX__SCALAR_X4,qmin)6753 TEST(QS8_VADD_MINMAX__SCALAR_X4, qmin) {
6754   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6755     VAddMicrokernelTester()
6756       .batch_size(batch_size)
6757       .qmin(128)
6758       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6759   }
6760 }
6761 
TEST(QS8_VADD_MINMAX__SCALAR_X4,qmax)6762 TEST(QS8_VADD_MINMAX__SCALAR_X4, qmax) {
6763   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6764     VAddMicrokernelTester()
6765       .batch_size(batch_size)
6766       .qmax(128)
6767       .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6768   }
6769 }