xref: /aosp_15_r20/external/XNNPACK/test/qu8-vadd-minmax.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/qu8-vadd-minmax.yaml
8 //   Generator: tools/generate-vbinary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vadd.h>
18 #include "vadd-microkernel-tester.h"
19 
20 
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,batch_eq_8)22   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
23     TEST_REQUIRES_ARM_NEON;
24     VAddMicrokernelTester()
25       .batch_size(8)
26       .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
27   }
28 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,batch_div_8)29   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32       VAddMicrokernelTester()
33         .batch_size(batch_size)
34         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
35     }
36   }
37 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,batch_lt_8)38   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
39     TEST_REQUIRES_ARM_NEON;
40     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41       VAddMicrokernelTester()
42         .batch_size(batch_size)
43         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
44     }
45   }
46 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,batch_gt_8)47   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
48     TEST_REQUIRES_ARM_NEON;
49     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50       VAddMicrokernelTester()
51         .batch_size(batch_size)
52         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
53     }
54   }
55 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,inplace_a)56   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
57     TEST_REQUIRES_ARM_NEON;
58     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59       VAddMicrokernelTester()
60         .batch_size(batch_size)
61         .inplace_a(true)
62         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
63     }
64   }
65 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,inplace_b)66   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
67     TEST_REQUIRES_ARM_NEON;
68     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69       VAddMicrokernelTester()
70         .batch_size(batch_size)
71         .inplace_b(true)
72         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
73     }
74   }
75 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,inplace_a_and_b)76   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
77     TEST_REQUIRES_ARM_NEON;
78     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79       VAddMicrokernelTester()
80         .batch_size(batch_size)
81         .inplace_a(true)
82         .inplace_b(true)
83         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
84     }
85   }
86 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,a_zero_point)87   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
88     TEST_REQUIRES_ARM_NEON;
89     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91         VAddMicrokernelTester()
92           .batch_size(batch_size)
93           .a_zero_point(a_zero_point)
94           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
95       }
96     }
97   }
98 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,b_zero_point)99   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
100     TEST_REQUIRES_ARM_NEON;
101     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103         VAddMicrokernelTester()
104           .batch_size(batch_size)
105           .b_zero_point(b_zero_point)
106           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
107       }
108     }
109   }
110 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,y_zero_point)111   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
112     TEST_REQUIRES_ARM_NEON;
113     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115         VAddMicrokernelTester()
116           .batch_size(batch_size)
117           .y_zero_point(y_zero_point)
118           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
119       }
120     }
121   }
122 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,a_scale)123   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
124     TEST_REQUIRES_ARM_NEON;
125     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127         VAddMicrokernelTester()
128           .batch_size(batch_size)
129           .a_scale(a_scale)
130           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
131       }
132     }
133   }
134 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,b_scale)135   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
136     TEST_REQUIRES_ARM_NEON;
137     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139         VAddMicrokernelTester()
140           .batch_size(batch_size)
141           .b_scale(b_scale)
142           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
143       }
144     }
145   }
146 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,y_scale)147   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
148     TEST_REQUIRES_ARM_NEON;
149     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151         VAddMicrokernelTester()
152           .batch_size(batch_size)
153           .y_scale(y_scale)
154           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
155       }
156     }
157   }
158 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,qmin)159   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, qmin) {
160     TEST_REQUIRES_ARM_NEON;
161     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162       VAddMicrokernelTester()
163         .batch_size(batch_size)
164         .qmin(128)
165         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
166     }
167   }
168 
TEST(QU8_VADD_MINMAX__NEON_LD64_X8,qmax)169   TEST(QU8_VADD_MINMAX__NEON_LD64_X8, qmax) {
170     TEST_REQUIRES_ARM_NEON;
171     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172       VAddMicrokernelTester()
173         .batch_size(batch_size)
174         .qmax(128)
175         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
176     }
177   }
178 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
179 
180 
181 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,batch_eq_16)182   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
183     TEST_REQUIRES_ARM_NEON;
184     VAddMicrokernelTester()
185       .batch_size(16)
186       .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
187   }
188 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,batch_div_16)189   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
190     TEST_REQUIRES_ARM_NEON;
191     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192       VAddMicrokernelTester()
193         .batch_size(batch_size)
194         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
195     }
196   }
197 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,batch_lt_16)198   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
199     TEST_REQUIRES_ARM_NEON;
200     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201       VAddMicrokernelTester()
202         .batch_size(batch_size)
203         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
204     }
205   }
206 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,batch_gt_16)207   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
208     TEST_REQUIRES_ARM_NEON;
209     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210       VAddMicrokernelTester()
211         .batch_size(batch_size)
212         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
213     }
214   }
215 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,inplace_a)216   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
217     TEST_REQUIRES_ARM_NEON;
218     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219       VAddMicrokernelTester()
220         .batch_size(batch_size)
221         .inplace_a(true)
222         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
223     }
224   }
225 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,inplace_b)226   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
227     TEST_REQUIRES_ARM_NEON;
228     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229       VAddMicrokernelTester()
230         .batch_size(batch_size)
231         .inplace_b(true)
232         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
233     }
234   }
235 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,inplace_a_and_b)236   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
237     TEST_REQUIRES_ARM_NEON;
238     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239       VAddMicrokernelTester()
240         .batch_size(batch_size)
241         .inplace_a(true)
242         .inplace_b(true)
243         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
244     }
245   }
246 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,a_zero_point)247   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
248     TEST_REQUIRES_ARM_NEON;
249     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251         VAddMicrokernelTester()
252           .batch_size(batch_size)
253           .a_zero_point(a_zero_point)
254           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
255       }
256     }
257   }
258 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,b_zero_point)259   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
260     TEST_REQUIRES_ARM_NEON;
261     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263         VAddMicrokernelTester()
264           .batch_size(batch_size)
265           .b_zero_point(b_zero_point)
266           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
267       }
268     }
269   }
270 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,y_zero_point)271   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
272     TEST_REQUIRES_ARM_NEON;
273     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275         VAddMicrokernelTester()
276           .batch_size(batch_size)
277           .y_zero_point(y_zero_point)
278           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
279       }
280     }
281   }
282 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,a_scale)283   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
284     TEST_REQUIRES_ARM_NEON;
285     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287         VAddMicrokernelTester()
288           .batch_size(batch_size)
289           .a_scale(a_scale)
290           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
291       }
292     }
293   }
294 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,b_scale)295   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
296     TEST_REQUIRES_ARM_NEON;
297     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299         VAddMicrokernelTester()
300           .batch_size(batch_size)
301           .b_scale(b_scale)
302           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
303       }
304     }
305   }
306 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,y_scale)307   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
308     TEST_REQUIRES_ARM_NEON;
309     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311         VAddMicrokernelTester()
312           .batch_size(batch_size)
313           .y_scale(y_scale)
314           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
315       }
316     }
317   }
318 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,qmin)319   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, qmin) {
320     TEST_REQUIRES_ARM_NEON;
321     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322       VAddMicrokernelTester()
323         .batch_size(batch_size)
324         .qmin(128)
325         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
326     }
327   }
328 
TEST(QU8_VADD_MINMAX__NEON_LD64_X16,qmax)329   TEST(QU8_VADD_MINMAX__NEON_LD64_X16, qmax) {
330     TEST_REQUIRES_ARM_NEON;
331     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332       VAddMicrokernelTester()
333         .batch_size(batch_size)
334         .qmax(128)
335         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
336     }
337   }
338 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
339 
340 
341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,batch_eq_32)342   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
343     TEST_REQUIRES_ARM_NEON;
344     VAddMicrokernelTester()
345       .batch_size(32)
346       .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
347   }
348 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,batch_div_32)349   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
350     TEST_REQUIRES_ARM_NEON;
351     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
352       VAddMicrokernelTester()
353         .batch_size(batch_size)
354         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
355     }
356   }
357 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,batch_lt_32)358   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
359     TEST_REQUIRES_ARM_NEON;
360     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
361       VAddMicrokernelTester()
362         .batch_size(batch_size)
363         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
364     }
365   }
366 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,batch_gt_32)367   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
368     TEST_REQUIRES_ARM_NEON;
369     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
370       VAddMicrokernelTester()
371         .batch_size(batch_size)
372         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
373     }
374   }
375 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,inplace_a)376   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
377     TEST_REQUIRES_ARM_NEON;
378     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
379       VAddMicrokernelTester()
380         .batch_size(batch_size)
381         .inplace_a(true)
382         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
383     }
384   }
385 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,inplace_b)386   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
387     TEST_REQUIRES_ARM_NEON;
388     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
389       VAddMicrokernelTester()
390         .batch_size(batch_size)
391         .inplace_b(true)
392         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
393     }
394   }
395 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,inplace_a_and_b)396   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
397     TEST_REQUIRES_ARM_NEON;
398     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
399       VAddMicrokernelTester()
400         .batch_size(batch_size)
401         .inplace_a(true)
402         .inplace_b(true)
403         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
404     }
405   }
406 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,a_zero_point)407   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
408     TEST_REQUIRES_ARM_NEON;
409     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
410       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411         VAddMicrokernelTester()
412           .batch_size(batch_size)
413           .a_zero_point(a_zero_point)
414           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
415       }
416     }
417   }
418 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,b_zero_point)419   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
420     TEST_REQUIRES_ARM_NEON;
421     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
422       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423         VAddMicrokernelTester()
424           .batch_size(batch_size)
425           .b_zero_point(b_zero_point)
426           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
427       }
428     }
429   }
430 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,y_zero_point)431   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
432     TEST_REQUIRES_ARM_NEON;
433     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
434       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435         VAddMicrokernelTester()
436           .batch_size(batch_size)
437           .y_zero_point(y_zero_point)
438           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
439       }
440     }
441   }
442 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,a_scale)443   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
444     TEST_REQUIRES_ARM_NEON;
445     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
446       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447         VAddMicrokernelTester()
448           .batch_size(batch_size)
449           .a_scale(a_scale)
450           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
451       }
452     }
453   }
454 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,b_scale)455   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
456     TEST_REQUIRES_ARM_NEON;
457     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
458       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459         VAddMicrokernelTester()
460           .batch_size(batch_size)
461           .b_scale(b_scale)
462           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
463       }
464     }
465   }
466 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,y_scale)467   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
468     TEST_REQUIRES_ARM_NEON;
469     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
470       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471         VAddMicrokernelTester()
472           .batch_size(batch_size)
473           .y_scale(y_scale)
474           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
475       }
476     }
477   }
478 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,qmin)479   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, qmin) {
480     TEST_REQUIRES_ARM_NEON;
481     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
482       VAddMicrokernelTester()
483         .batch_size(batch_size)
484         .qmin(128)
485         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
486     }
487   }
488 
TEST(QU8_VADD_MINMAX__NEON_LD64_X32,qmax)489   TEST(QU8_VADD_MINMAX__NEON_LD64_X32, qmax) {
490     TEST_REQUIRES_ARM_NEON;
491     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
492       VAddMicrokernelTester()
493         .batch_size(batch_size)
494         .qmax(128)
495         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
496     }
497   }
498 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
499 
500 
501 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,batch_eq_16)502   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_eq_16) {
503     TEST_REQUIRES_ARM_NEON;
504     VAddMicrokernelTester()
505       .batch_size(16)
506       .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
507   }
508 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,batch_div_16)509   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_div_16) {
510     TEST_REQUIRES_ARM_NEON;
511     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
512       VAddMicrokernelTester()
513         .batch_size(batch_size)
514         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
515     }
516   }
517 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,batch_lt_16)518   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_lt_16) {
519     TEST_REQUIRES_ARM_NEON;
520     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
521       VAddMicrokernelTester()
522         .batch_size(batch_size)
523         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
524     }
525   }
526 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,batch_gt_16)527   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, batch_gt_16) {
528     TEST_REQUIRES_ARM_NEON;
529     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
530       VAddMicrokernelTester()
531         .batch_size(batch_size)
532         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
533     }
534   }
535 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,inplace_a)536   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_a) {
537     TEST_REQUIRES_ARM_NEON;
538     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
539       VAddMicrokernelTester()
540         .batch_size(batch_size)
541         .inplace_a(true)
542         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
543     }
544   }
545 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,inplace_b)546   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_b) {
547     TEST_REQUIRES_ARM_NEON;
548     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
549       VAddMicrokernelTester()
550         .batch_size(batch_size)
551         .inplace_b(true)
552         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
553     }
554   }
555 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,inplace_a_and_b)556   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, inplace_a_and_b) {
557     TEST_REQUIRES_ARM_NEON;
558     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
559       VAddMicrokernelTester()
560         .batch_size(batch_size)
561         .inplace_a(true)
562         .inplace_b(true)
563         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
564     }
565   }
566 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,a_zero_point)567   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, a_zero_point) {
568     TEST_REQUIRES_ARM_NEON;
569     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
570       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571         VAddMicrokernelTester()
572           .batch_size(batch_size)
573           .a_zero_point(a_zero_point)
574           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
575       }
576     }
577   }
578 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,b_zero_point)579   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, b_zero_point) {
580     TEST_REQUIRES_ARM_NEON;
581     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
582       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583         VAddMicrokernelTester()
584           .batch_size(batch_size)
585           .b_zero_point(b_zero_point)
586           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
587       }
588     }
589   }
590 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,y_zero_point)591   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, y_zero_point) {
592     TEST_REQUIRES_ARM_NEON;
593     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
594       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595         VAddMicrokernelTester()
596           .batch_size(batch_size)
597           .y_zero_point(y_zero_point)
598           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
599       }
600     }
601   }
602 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,a_scale)603   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, a_scale) {
604     TEST_REQUIRES_ARM_NEON;
605     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
606       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607         VAddMicrokernelTester()
608           .batch_size(batch_size)
609           .a_scale(a_scale)
610           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
611       }
612     }
613   }
614 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,b_scale)615   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, b_scale) {
616     TEST_REQUIRES_ARM_NEON;
617     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
618       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619         VAddMicrokernelTester()
620           .batch_size(batch_size)
621           .b_scale(b_scale)
622           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
623       }
624     }
625   }
626 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,y_scale)627   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, y_scale) {
628     TEST_REQUIRES_ARM_NEON;
629     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
630       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631         VAddMicrokernelTester()
632           .batch_size(batch_size)
633           .y_scale(y_scale)
634           .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
635       }
636     }
637   }
638 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,qmin)639   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, qmin) {
640     TEST_REQUIRES_ARM_NEON;
641     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
642       VAddMicrokernelTester()
643         .batch_size(batch_size)
644         .qmin(128)
645         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
646     }
647   }
648 
TEST(QU8_VADD_MINMAX__NEON_LD128_X16,qmax)649   TEST(QU8_VADD_MINMAX__NEON_LD128_X16, qmax) {
650     TEST_REQUIRES_ARM_NEON;
651     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
652       VAddMicrokernelTester()
653         .batch_size(batch_size)
654         .qmax(128)
655         .Test(xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
656     }
657   }
658 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
659 
660 
661 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_eq_8)662   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
663     TEST_REQUIRES_X86_SSE2;
664     VAddMicrokernelTester()
665       .batch_size(8)
666       .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
667   }
668 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_div_8)669   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
670     TEST_REQUIRES_X86_SSE2;
671     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
672       VAddMicrokernelTester()
673         .batch_size(batch_size)
674         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
675     }
676   }
677 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_lt_8)678   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
679     TEST_REQUIRES_X86_SSE2;
680     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
681       VAddMicrokernelTester()
682         .batch_size(batch_size)
683         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
684     }
685   }
686 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_gt_8)687   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
688     TEST_REQUIRES_X86_SSE2;
689     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
690       VAddMicrokernelTester()
691         .batch_size(batch_size)
692         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
693     }
694   }
695 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_a)696   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
697     TEST_REQUIRES_X86_SSE2;
698     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
699       VAddMicrokernelTester()
700         .batch_size(batch_size)
701         .inplace_a(true)
702         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
703     }
704   }
705 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_b)706   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
707     TEST_REQUIRES_X86_SSE2;
708     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
709       VAddMicrokernelTester()
710         .batch_size(batch_size)
711         .inplace_b(true)
712         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
713     }
714   }
715 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_a_and_b)716   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
717     TEST_REQUIRES_X86_SSE2;
718     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
719       VAddMicrokernelTester()
720         .batch_size(batch_size)
721         .inplace_a(true)
722         .inplace_b(true)
723         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
724     }
725   }
726 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,a_zero_point)727   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
728     TEST_REQUIRES_X86_SSE2;
729     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
730       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731         VAddMicrokernelTester()
732           .batch_size(batch_size)
733           .a_zero_point(a_zero_point)
734           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
735       }
736     }
737   }
738 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,b_zero_point)739   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
740     TEST_REQUIRES_X86_SSE2;
741     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
742       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743         VAddMicrokernelTester()
744           .batch_size(batch_size)
745           .b_zero_point(b_zero_point)
746           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
747       }
748     }
749   }
750 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,y_zero_point)751   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
752     TEST_REQUIRES_X86_SSE2;
753     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
754       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755         VAddMicrokernelTester()
756           .batch_size(batch_size)
757           .y_zero_point(y_zero_point)
758           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
759       }
760     }
761   }
762 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,a_scale)763   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
764     TEST_REQUIRES_X86_SSE2;
765     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
766       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767         VAddMicrokernelTester()
768           .batch_size(batch_size)
769           .a_scale(a_scale)
770           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
771       }
772     }
773   }
774 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,b_scale)775   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
776     TEST_REQUIRES_X86_SSE2;
777     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
778       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779         VAddMicrokernelTester()
780           .batch_size(batch_size)
781           .b_scale(b_scale)
782           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
783       }
784     }
785   }
786 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,y_scale)787   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
788     TEST_REQUIRES_X86_SSE2;
789     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
790       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791         VAddMicrokernelTester()
792           .batch_size(batch_size)
793           .y_scale(y_scale)
794           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
795       }
796     }
797   }
798 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,qmin)799   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
800     TEST_REQUIRES_X86_SSE2;
801     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
802       VAddMicrokernelTester()
803         .batch_size(batch_size)
804         .qmin(128)
805         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
806     }
807   }
808 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8,qmax)809   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
810     TEST_REQUIRES_X86_SSE2;
811     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
812       VAddMicrokernelTester()
813         .batch_size(batch_size)
814         .qmax(128)
815         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
816     }
817   }
818 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
819 
820 
821 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_eq_16)822   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
823     TEST_REQUIRES_X86_SSE2;
824     VAddMicrokernelTester()
825       .batch_size(16)
826       .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
827   }
828 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_div_16)829   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
830     TEST_REQUIRES_X86_SSE2;
831     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
832       VAddMicrokernelTester()
833         .batch_size(batch_size)
834         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
835     }
836   }
837 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_lt_16)838   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
839     TEST_REQUIRES_X86_SSE2;
840     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
841       VAddMicrokernelTester()
842         .batch_size(batch_size)
843         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
844     }
845   }
846 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_gt_16)847   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
848     TEST_REQUIRES_X86_SSE2;
849     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
850       VAddMicrokernelTester()
851         .batch_size(batch_size)
852         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
853     }
854   }
855 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_a)856   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
857     TEST_REQUIRES_X86_SSE2;
858     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
859       VAddMicrokernelTester()
860         .batch_size(batch_size)
861         .inplace_a(true)
862         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
863     }
864   }
865 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_b)866   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
867     TEST_REQUIRES_X86_SSE2;
868     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
869       VAddMicrokernelTester()
870         .batch_size(batch_size)
871         .inplace_b(true)
872         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
873     }
874   }
875 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_a_and_b)876   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
877     TEST_REQUIRES_X86_SSE2;
878     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
879       VAddMicrokernelTester()
880         .batch_size(batch_size)
881         .inplace_a(true)
882         .inplace_b(true)
883         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
884     }
885   }
886 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,a_zero_point)887   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
888     TEST_REQUIRES_X86_SSE2;
889     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
890       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891         VAddMicrokernelTester()
892           .batch_size(batch_size)
893           .a_zero_point(a_zero_point)
894           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
895       }
896     }
897   }
898 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,b_zero_point)899   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
900     TEST_REQUIRES_X86_SSE2;
901     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
902       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903         VAddMicrokernelTester()
904           .batch_size(batch_size)
905           .b_zero_point(b_zero_point)
906           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
907       }
908     }
909   }
910 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,y_zero_point)911   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
912     TEST_REQUIRES_X86_SSE2;
913     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
914       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915         VAddMicrokernelTester()
916           .batch_size(batch_size)
917           .y_zero_point(y_zero_point)
918           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
919       }
920     }
921   }
922 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,a_scale)923   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
924     TEST_REQUIRES_X86_SSE2;
925     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
926       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927         VAddMicrokernelTester()
928           .batch_size(batch_size)
929           .a_scale(a_scale)
930           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
931       }
932     }
933   }
934 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,b_scale)935   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
936     TEST_REQUIRES_X86_SSE2;
937     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
938       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939         VAddMicrokernelTester()
940           .batch_size(batch_size)
941           .b_scale(b_scale)
942           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
943       }
944     }
945   }
946 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,y_scale)947   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
948     TEST_REQUIRES_X86_SSE2;
949     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
950       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951         VAddMicrokernelTester()
952           .batch_size(batch_size)
953           .y_scale(y_scale)
954           .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
955       }
956     }
957   }
958 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,qmin)959   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
960     TEST_REQUIRES_X86_SSE2;
961     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
962       VAddMicrokernelTester()
963         .batch_size(batch_size)
964         .qmin(128)
965         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
966     }
967   }
968 
TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16,qmax)969   TEST(QU8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
970     TEST_REQUIRES_X86_SSE2;
971     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
972       VAddMicrokernelTester()
973         .batch_size(batch_size)
974         .qmax(128)
975         .Test(xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
976     }
977   }
978 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
979 
980 
981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_eq_8)982   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
983     TEST_REQUIRES_X86_SSE41;
984     VAddMicrokernelTester()
985       .batch_size(8)
986       .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
987   }
988 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_div_8)989   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
990     TEST_REQUIRES_X86_SSE41;
991     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992       VAddMicrokernelTester()
993         .batch_size(batch_size)
994         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
995     }
996   }
997 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_lt_8)998   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
999     TEST_REQUIRES_X86_SSE41;
1000     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001       VAddMicrokernelTester()
1002         .batch_size(batch_size)
1003         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1004     }
1005   }
1006 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_gt_8)1007   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1008     TEST_REQUIRES_X86_SSE41;
1009     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010       VAddMicrokernelTester()
1011         .batch_size(batch_size)
1012         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1013     }
1014   }
1015 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_a)1016   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1017     TEST_REQUIRES_X86_SSE41;
1018     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019       VAddMicrokernelTester()
1020         .batch_size(batch_size)
1021         .inplace_a(true)
1022         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1023     }
1024   }
1025 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_b)1026   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1027     TEST_REQUIRES_X86_SSE41;
1028     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029       VAddMicrokernelTester()
1030         .batch_size(batch_size)
1031         .inplace_b(true)
1032         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1033     }
1034   }
1035 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_a_and_b)1036   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1037     TEST_REQUIRES_X86_SSE41;
1038     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039       VAddMicrokernelTester()
1040         .batch_size(batch_size)
1041         .inplace_a(true)
1042         .inplace_b(true)
1043         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1044     }
1045   }
1046 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,a_zero_point)1047   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1048     TEST_REQUIRES_X86_SSE41;
1049     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051         VAddMicrokernelTester()
1052           .batch_size(batch_size)
1053           .a_zero_point(a_zero_point)
1054           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1055       }
1056     }
1057   }
1058 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,b_zero_point)1059   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1060     TEST_REQUIRES_X86_SSE41;
1061     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063         VAddMicrokernelTester()
1064           .batch_size(batch_size)
1065           .b_zero_point(b_zero_point)
1066           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1067       }
1068     }
1069   }
1070 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,y_zero_point)1071   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1072     TEST_REQUIRES_X86_SSE41;
1073     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075         VAddMicrokernelTester()
1076           .batch_size(batch_size)
1077           .y_zero_point(y_zero_point)
1078           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1079       }
1080     }
1081   }
1082 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,a_scale)1083   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1084     TEST_REQUIRES_X86_SSE41;
1085     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087         VAddMicrokernelTester()
1088           .batch_size(batch_size)
1089           .a_scale(a_scale)
1090           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1091       }
1092     }
1093   }
1094 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,b_scale)1095   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1096     TEST_REQUIRES_X86_SSE41;
1097     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099         VAddMicrokernelTester()
1100           .batch_size(batch_size)
1101           .b_scale(b_scale)
1102           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1103       }
1104     }
1105   }
1106 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,y_scale)1107   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1108     TEST_REQUIRES_X86_SSE41;
1109     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111         VAddMicrokernelTester()
1112           .batch_size(batch_size)
1113           .y_scale(y_scale)
1114           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1115       }
1116     }
1117   }
1118 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,qmin)1119   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1120     TEST_REQUIRES_X86_SSE41;
1121     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122       VAddMicrokernelTester()
1123         .batch_size(batch_size)
1124         .qmin(128)
1125         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1126     }
1127   }
1128 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8,qmax)1129   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1130     TEST_REQUIRES_X86_SSE41;
1131     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132       VAddMicrokernelTester()
1133         .batch_size(batch_size)
1134         .qmax(128)
1135         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1136     }
1137   }
1138 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139 
1140 
1141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_eq_16)1142   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1143     TEST_REQUIRES_X86_SSE41;
1144     VAddMicrokernelTester()
1145       .batch_size(16)
1146       .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1147   }
1148 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_div_16)1149   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1150     TEST_REQUIRES_X86_SSE41;
1151     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152       VAddMicrokernelTester()
1153         .batch_size(batch_size)
1154         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1155     }
1156   }
1157 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_lt_16)1158   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1159     TEST_REQUIRES_X86_SSE41;
1160     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161       VAddMicrokernelTester()
1162         .batch_size(batch_size)
1163         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1164     }
1165   }
1166 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_gt_16)1167   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1168     TEST_REQUIRES_X86_SSE41;
1169     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170       VAddMicrokernelTester()
1171         .batch_size(batch_size)
1172         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1173     }
1174   }
1175 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_a)1176   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1177     TEST_REQUIRES_X86_SSE41;
1178     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179       VAddMicrokernelTester()
1180         .batch_size(batch_size)
1181         .inplace_a(true)
1182         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1183     }
1184   }
1185 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_b)1186   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1187     TEST_REQUIRES_X86_SSE41;
1188     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189       VAddMicrokernelTester()
1190         .batch_size(batch_size)
1191         .inplace_b(true)
1192         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1193     }
1194   }
1195 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_a_and_b)1196   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1197     TEST_REQUIRES_X86_SSE41;
1198     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199       VAddMicrokernelTester()
1200         .batch_size(batch_size)
1201         .inplace_a(true)
1202         .inplace_b(true)
1203         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1204     }
1205   }
1206 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,a_zero_point)1207   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1208     TEST_REQUIRES_X86_SSE41;
1209     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211         VAddMicrokernelTester()
1212           .batch_size(batch_size)
1213           .a_zero_point(a_zero_point)
1214           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1215       }
1216     }
1217   }
1218 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,b_zero_point)1219   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1220     TEST_REQUIRES_X86_SSE41;
1221     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223         VAddMicrokernelTester()
1224           .batch_size(batch_size)
1225           .b_zero_point(b_zero_point)
1226           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1227       }
1228     }
1229   }
1230 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,y_zero_point)1231   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1232     TEST_REQUIRES_X86_SSE41;
1233     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235         VAddMicrokernelTester()
1236           .batch_size(batch_size)
1237           .y_zero_point(y_zero_point)
1238           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1239       }
1240     }
1241   }
1242 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,a_scale)1243   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1244     TEST_REQUIRES_X86_SSE41;
1245     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247         VAddMicrokernelTester()
1248           .batch_size(batch_size)
1249           .a_scale(a_scale)
1250           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1251       }
1252     }
1253   }
1254 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,b_scale)1255   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1256     TEST_REQUIRES_X86_SSE41;
1257     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259         VAddMicrokernelTester()
1260           .batch_size(batch_size)
1261           .b_scale(b_scale)
1262           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1263       }
1264     }
1265   }
1266 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,y_scale)1267   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1268     TEST_REQUIRES_X86_SSE41;
1269     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271         VAddMicrokernelTester()
1272           .batch_size(batch_size)
1273           .y_scale(y_scale)
1274           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1275       }
1276     }
1277   }
1278 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,qmin)1279   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1280     TEST_REQUIRES_X86_SSE41;
1281     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282       VAddMicrokernelTester()
1283         .batch_size(batch_size)
1284         .qmin(128)
1285         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1286     }
1287   }
1288 
TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16,qmax)1289   TEST(QU8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1290     TEST_REQUIRES_X86_SSE41;
1291     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292       VAddMicrokernelTester()
1293         .batch_size(batch_size)
1294         .qmax(128)
1295         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1296     }
1297   }
1298 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299 
1300 
1301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_eq_8)1302   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
1303     TEST_REQUIRES_X86_AVX;
1304     VAddMicrokernelTester()
1305       .batch_size(8)
1306       .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1307   }
1308 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_div_8)1309   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
1310     TEST_REQUIRES_X86_AVX;
1311     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1312       VAddMicrokernelTester()
1313         .batch_size(batch_size)
1314         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1315     }
1316   }
1317 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_lt_8)1318   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
1319     TEST_REQUIRES_X86_AVX;
1320     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1321       VAddMicrokernelTester()
1322         .batch_size(batch_size)
1323         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1324     }
1325   }
1326 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_gt_8)1327   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
1328     TEST_REQUIRES_X86_AVX;
1329     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1330       VAddMicrokernelTester()
1331         .batch_size(batch_size)
1332         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1333     }
1334   }
1335 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_a)1336   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a) {
1337     TEST_REQUIRES_X86_AVX;
1338     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1339       VAddMicrokernelTester()
1340         .batch_size(batch_size)
1341         .inplace_a(true)
1342         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1343     }
1344   }
1345 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_b)1346   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_b) {
1347     TEST_REQUIRES_X86_AVX;
1348     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1349       VAddMicrokernelTester()
1350         .batch_size(batch_size)
1351         .inplace_b(true)
1352         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1353     }
1354   }
1355 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_a_and_b)1356   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a_and_b) {
1357     TEST_REQUIRES_X86_AVX;
1358     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1359       VAddMicrokernelTester()
1360         .batch_size(batch_size)
1361         .inplace_a(true)
1362         .inplace_b(true)
1363         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1364     }
1365   }
1366 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,a_zero_point)1367   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
1368     TEST_REQUIRES_X86_AVX;
1369     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1370       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371         VAddMicrokernelTester()
1372           .batch_size(batch_size)
1373           .a_zero_point(a_zero_point)
1374           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1375       }
1376     }
1377   }
1378 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,b_zero_point)1379   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
1380     TEST_REQUIRES_X86_AVX;
1381     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1382       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383         VAddMicrokernelTester()
1384           .batch_size(batch_size)
1385           .b_zero_point(b_zero_point)
1386           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1387       }
1388     }
1389   }
1390 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,y_zero_point)1391   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
1392     TEST_REQUIRES_X86_AVX;
1393     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1394       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395         VAddMicrokernelTester()
1396           .batch_size(batch_size)
1397           .y_zero_point(y_zero_point)
1398           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1399       }
1400     }
1401   }
1402 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,a_scale)1403   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
1404     TEST_REQUIRES_X86_AVX;
1405     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1406       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407         VAddMicrokernelTester()
1408           .batch_size(batch_size)
1409           .a_scale(a_scale)
1410           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1411       }
1412     }
1413   }
1414 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,b_scale)1415   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
1416     TEST_REQUIRES_X86_AVX;
1417     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1418       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419         VAddMicrokernelTester()
1420           .batch_size(batch_size)
1421           .b_scale(b_scale)
1422           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1423       }
1424     }
1425   }
1426 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,y_scale)1427   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
1428     TEST_REQUIRES_X86_AVX;
1429     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1430       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431         VAddMicrokernelTester()
1432           .batch_size(batch_size)
1433           .y_scale(y_scale)
1434           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1435       }
1436     }
1437   }
1438 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,qmin)1439   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmin) {
1440     TEST_REQUIRES_X86_AVX;
1441     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1442       VAddMicrokernelTester()
1443         .batch_size(batch_size)
1444         .qmin(128)
1445         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1446     }
1447   }
1448 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8,qmax)1449   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmax) {
1450     TEST_REQUIRES_X86_AVX;
1451     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1452       VAddMicrokernelTester()
1453         .batch_size(batch_size)
1454         .qmax(128)
1455         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1456     }
1457   }
1458 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459 
1460 
1461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_eq_16)1462   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
1463     TEST_REQUIRES_X86_AVX;
1464     VAddMicrokernelTester()
1465       .batch_size(16)
1466       .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1467   }
1468 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_div_16)1469   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
1470     TEST_REQUIRES_X86_AVX;
1471     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1472       VAddMicrokernelTester()
1473         .batch_size(batch_size)
1474         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1475     }
1476   }
1477 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_lt_16)1478   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
1479     TEST_REQUIRES_X86_AVX;
1480     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1481       VAddMicrokernelTester()
1482         .batch_size(batch_size)
1483         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1484     }
1485   }
1486 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_gt_16)1487   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
1488     TEST_REQUIRES_X86_AVX;
1489     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1490       VAddMicrokernelTester()
1491         .batch_size(batch_size)
1492         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1493     }
1494   }
1495 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_a)1496   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a) {
1497     TEST_REQUIRES_X86_AVX;
1498     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1499       VAddMicrokernelTester()
1500         .batch_size(batch_size)
1501         .inplace_a(true)
1502         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1503     }
1504   }
1505 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_b)1506   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_b) {
1507     TEST_REQUIRES_X86_AVX;
1508     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1509       VAddMicrokernelTester()
1510         .batch_size(batch_size)
1511         .inplace_b(true)
1512         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1513     }
1514   }
1515 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_a_and_b)1516   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a_and_b) {
1517     TEST_REQUIRES_X86_AVX;
1518     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1519       VAddMicrokernelTester()
1520         .batch_size(batch_size)
1521         .inplace_a(true)
1522         .inplace_b(true)
1523         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1524     }
1525   }
1526 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,a_zero_point)1527   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
1528     TEST_REQUIRES_X86_AVX;
1529     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1530       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531         VAddMicrokernelTester()
1532           .batch_size(batch_size)
1533           .a_zero_point(a_zero_point)
1534           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1535       }
1536     }
1537   }
1538 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,b_zero_point)1539   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
1540     TEST_REQUIRES_X86_AVX;
1541     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1542       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543         VAddMicrokernelTester()
1544           .batch_size(batch_size)
1545           .b_zero_point(b_zero_point)
1546           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1547       }
1548     }
1549   }
1550 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,y_zero_point)1551   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
1552     TEST_REQUIRES_X86_AVX;
1553     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1554       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555         VAddMicrokernelTester()
1556           .batch_size(batch_size)
1557           .y_zero_point(y_zero_point)
1558           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1559       }
1560     }
1561   }
1562 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,a_scale)1563   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
1564     TEST_REQUIRES_X86_AVX;
1565     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1566       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567         VAddMicrokernelTester()
1568           .batch_size(batch_size)
1569           .a_scale(a_scale)
1570           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1571       }
1572     }
1573   }
1574 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,b_scale)1575   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
1576     TEST_REQUIRES_X86_AVX;
1577     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1578       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579         VAddMicrokernelTester()
1580           .batch_size(batch_size)
1581           .b_scale(b_scale)
1582           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1583       }
1584     }
1585   }
1586 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,y_scale)1587   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
1588     TEST_REQUIRES_X86_AVX;
1589     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1590       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591         VAddMicrokernelTester()
1592           .batch_size(batch_size)
1593           .y_scale(y_scale)
1594           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1595       }
1596     }
1597   }
1598 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,qmin)1599   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmin) {
1600     TEST_REQUIRES_X86_AVX;
1601     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1602       VAddMicrokernelTester()
1603         .batch_size(batch_size)
1604         .qmin(128)
1605         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1606     }
1607   }
1608 
TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16,qmax)1609   TEST(QU8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmax) {
1610     TEST_REQUIRES_X86_AVX;
1611     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1612       VAddMicrokernelTester()
1613         .batch_size(batch_size)
1614         .qmax(128)
1615         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1616     }
1617   }
1618 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619 
1620 
1621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_eq_8)1622   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
1623     TEST_REQUIRES_X86_SSE41;
1624     VAddMicrokernelTester()
1625       .batch_size(8)
1626       .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1627   }
1628 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_div_8)1629   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
1630     TEST_REQUIRES_X86_SSE41;
1631     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632       VAddMicrokernelTester()
1633         .batch_size(batch_size)
1634         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1635     }
1636   }
1637 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_lt_8)1638   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
1639     TEST_REQUIRES_X86_SSE41;
1640     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641       VAddMicrokernelTester()
1642         .batch_size(batch_size)
1643         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1644     }
1645   }
1646 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_gt_8)1647   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
1648     TEST_REQUIRES_X86_SSE41;
1649     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650       VAddMicrokernelTester()
1651         .batch_size(batch_size)
1652         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1653     }
1654   }
1655 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_a)1656   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
1657     TEST_REQUIRES_X86_SSE41;
1658     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659       VAddMicrokernelTester()
1660         .batch_size(batch_size)
1661         .inplace_a(true)
1662         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1663     }
1664   }
1665 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_b)1666   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
1667     TEST_REQUIRES_X86_SSE41;
1668     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669       VAddMicrokernelTester()
1670         .batch_size(batch_size)
1671         .inplace_b(true)
1672         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1673     }
1674   }
1675 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_a_and_b)1676   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
1677     TEST_REQUIRES_X86_SSE41;
1678     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679       VAddMicrokernelTester()
1680         .batch_size(batch_size)
1681         .inplace_a(true)
1682         .inplace_b(true)
1683         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1684     }
1685   }
1686 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,a_zero_point)1687   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
1688     TEST_REQUIRES_X86_SSE41;
1689     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691         VAddMicrokernelTester()
1692           .batch_size(batch_size)
1693           .a_zero_point(a_zero_point)
1694           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1695       }
1696     }
1697   }
1698 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,b_zero_point)1699   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
1700     TEST_REQUIRES_X86_SSE41;
1701     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703         VAddMicrokernelTester()
1704           .batch_size(batch_size)
1705           .b_zero_point(b_zero_point)
1706           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1707       }
1708     }
1709   }
1710 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,y_zero_point)1711   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
1712     TEST_REQUIRES_X86_SSE41;
1713     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715         VAddMicrokernelTester()
1716           .batch_size(batch_size)
1717           .y_zero_point(y_zero_point)
1718           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1719       }
1720     }
1721   }
1722 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,a_scale)1723   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
1724     TEST_REQUIRES_X86_SSE41;
1725     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727         VAddMicrokernelTester()
1728           .batch_size(batch_size)
1729           .a_scale(a_scale)
1730           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1731       }
1732     }
1733   }
1734 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,b_scale)1735   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
1736     TEST_REQUIRES_X86_SSE41;
1737     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739         VAddMicrokernelTester()
1740           .batch_size(batch_size)
1741           .b_scale(b_scale)
1742           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1743       }
1744     }
1745   }
1746 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,y_scale)1747   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
1748     TEST_REQUIRES_X86_SSE41;
1749     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751         VAddMicrokernelTester()
1752           .batch_size(batch_size)
1753           .y_scale(y_scale)
1754           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1755       }
1756     }
1757   }
1758 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,qmin)1759   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
1760     TEST_REQUIRES_X86_SSE41;
1761     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762       VAddMicrokernelTester()
1763         .batch_size(batch_size)
1764         .qmin(128)
1765         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1766     }
1767   }
1768 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8,qmax)1769   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
1770     TEST_REQUIRES_X86_SSE41;
1771     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772       VAddMicrokernelTester()
1773         .batch_size(batch_size)
1774         .qmax(128)
1775         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1776     }
1777   }
1778 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779 
1780 
1781 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_eq_16)1782   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
1783     TEST_REQUIRES_X86_SSE41;
1784     VAddMicrokernelTester()
1785       .batch_size(16)
1786       .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1787   }
1788 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_div_16)1789   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
1790     TEST_REQUIRES_X86_SSE41;
1791     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792       VAddMicrokernelTester()
1793         .batch_size(batch_size)
1794         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1795     }
1796   }
1797 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_lt_16)1798   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
1799     TEST_REQUIRES_X86_SSE41;
1800     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801       VAddMicrokernelTester()
1802         .batch_size(batch_size)
1803         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1804     }
1805   }
1806 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_gt_16)1807   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
1808     TEST_REQUIRES_X86_SSE41;
1809     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810       VAddMicrokernelTester()
1811         .batch_size(batch_size)
1812         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1813     }
1814   }
1815 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_a)1816   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
1817     TEST_REQUIRES_X86_SSE41;
1818     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819       VAddMicrokernelTester()
1820         .batch_size(batch_size)
1821         .inplace_a(true)
1822         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1823     }
1824   }
1825 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_b)1826   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
1827     TEST_REQUIRES_X86_SSE41;
1828     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829       VAddMicrokernelTester()
1830         .batch_size(batch_size)
1831         .inplace_b(true)
1832         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1833     }
1834   }
1835 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_a_and_b)1836   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
1837     TEST_REQUIRES_X86_SSE41;
1838     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839       VAddMicrokernelTester()
1840         .batch_size(batch_size)
1841         .inplace_a(true)
1842         .inplace_b(true)
1843         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1844     }
1845   }
1846 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,a_zero_point)1847   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
1848     TEST_REQUIRES_X86_SSE41;
1849     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851         VAddMicrokernelTester()
1852           .batch_size(batch_size)
1853           .a_zero_point(a_zero_point)
1854           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1855       }
1856     }
1857   }
1858 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,b_zero_point)1859   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
1860     TEST_REQUIRES_X86_SSE41;
1861     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863         VAddMicrokernelTester()
1864           .batch_size(batch_size)
1865           .b_zero_point(b_zero_point)
1866           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1867       }
1868     }
1869   }
1870 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,y_zero_point)1871   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
1872     TEST_REQUIRES_X86_SSE41;
1873     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875         VAddMicrokernelTester()
1876           .batch_size(batch_size)
1877           .y_zero_point(y_zero_point)
1878           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1879       }
1880     }
1881   }
1882 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,a_scale)1883   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
1884     TEST_REQUIRES_X86_SSE41;
1885     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887         VAddMicrokernelTester()
1888           .batch_size(batch_size)
1889           .a_scale(a_scale)
1890           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1891       }
1892     }
1893   }
1894 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,b_scale)1895   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
1896     TEST_REQUIRES_X86_SSE41;
1897     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899         VAddMicrokernelTester()
1900           .batch_size(batch_size)
1901           .b_scale(b_scale)
1902           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1903       }
1904     }
1905   }
1906 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,y_scale)1907   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
1908     TEST_REQUIRES_X86_SSE41;
1909     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911         VAddMicrokernelTester()
1912           .batch_size(batch_size)
1913           .y_scale(y_scale)
1914           .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1915       }
1916     }
1917   }
1918 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,qmin)1919   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
1920     TEST_REQUIRES_X86_SSE41;
1921     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922       VAddMicrokernelTester()
1923         .batch_size(batch_size)
1924         .qmin(128)
1925         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1926     }
1927   }
1928 
TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16,qmax)1929   TEST(QU8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
1930     TEST_REQUIRES_X86_SSE41;
1931     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932       VAddMicrokernelTester()
1933         .batch_size(batch_size)
1934         .qmax(128)
1935         .Test(xnn_qu8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1936     }
1937   }
1938 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939 
1940 
1941 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_eq_8)1942   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
1943     TEST_REQUIRES_X86_AVX;
1944     VAddMicrokernelTester()
1945       .batch_size(8)
1946       .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1947   }
1948 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_div_8)1949   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
1950     TEST_REQUIRES_X86_AVX;
1951     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1952       VAddMicrokernelTester()
1953         .batch_size(batch_size)
1954         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1955     }
1956   }
1957 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_lt_8)1958   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
1959     TEST_REQUIRES_X86_AVX;
1960     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1961       VAddMicrokernelTester()
1962         .batch_size(batch_size)
1963         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1964     }
1965   }
1966 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_gt_8)1967   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
1968     TEST_REQUIRES_X86_AVX;
1969     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1970       VAddMicrokernelTester()
1971         .batch_size(batch_size)
1972         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1973     }
1974   }
1975 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_a)1976   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a) {
1977     TEST_REQUIRES_X86_AVX;
1978     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1979       VAddMicrokernelTester()
1980         .batch_size(batch_size)
1981         .inplace_a(true)
1982         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1983     }
1984   }
1985 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_b)1986   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_b) {
1987     TEST_REQUIRES_X86_AVX;
1988     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1989       VAddMicrokernelTester()
1990         .batch_size(batch_size)
1991         .inplace_b(true)
1992         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1993     }
1994   }
1995 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_a_and_b)1996   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a_and_b) {
1997     TEST_REQUIRES_X86_AVX;
1998     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1999       VAddMicrokernelTester()
2000         .batch_size(batch_size)
2001         .inplace_a(true)
2002         .inplace_b(true)
2003         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2004     }
2005   }
2006 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,a_zero_point)2007   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
2008     TEST_REQUIRES_X86_AVX;
2009     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2010       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2011         VAddMicrokernelTester()
2012           .batch_size(batch_size)
2013           .a_zero_point(a_zero_point)
2014           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2015       }
2016     }
2017   }
2018 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,b_zero_point)2019   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
2020     TEST_REQUIRES_X86_AVX;
2021     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2022       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2023         VAddMicrokernelTester()
2024           .batch_size(batch_size)
2025           .b_zero_point(b_zero_point)
2026           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2027       }
2028     }
2029   }
2030 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,y_zero_point)2031   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
2032     TEST_REQUIRES_X86_AVX;
2033     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2034       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2035         VAddMicrokernelTester()
2036           .batch_size(batch_size)
2037           .y_zero_point(y_zero_point)
2038           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2039       }
2040     }
2041   }
2042 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,a_scale)2043   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
2044     TEST_REQUIRES_X86_AVX;
2045     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2046       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2047         VAddMicrokernelTester()
2048           .batch_size(batch_size)
2049           .a_scale(a_scale)
2050           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2051       }
2052     }
2053   }
2054 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,b_scale)2055   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
2056     TEST_REQUIRES_X86_AVX;
2057     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2058       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2059         VAddMicrokernelTester()
2060           .batch_size(batch_size)
2061           .b_scale(b_scale)
2062           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2063       }
2064     }
2065   }
2066 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,y_scale)2067   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
2068     TEST_REQUIRES_X86_AVX;
2069     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2070       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2071         VAddMicrokernelTester()
2072           .batch_size(batch_size)
2073           .y_scale(y_scale)
2074           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2075       }
2076     }
2077   }
2078 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,qmin)2079   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmin) {
2080     TEST_REQUIRES_X86_AVX;
2081     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2082       VAddMicrokernelTester()
2083         .batch_size(batch_size)
2084         .qmin(128)
2085         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2086     }
2087   }
2088 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8,qmax)2089   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmax) {
2090     TEST_REQUIRES_X86_AVX;
2091     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2092       VAddMicrokernelTester()
2093         .batch_size(batch_size)
2094         .qmax(128)
2095         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2096     }
2097   }
2098 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2099 
2100 
2101 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_eq_16)2102   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
2103     TEST_REQUIRES_X86_AVX;
2104     VAddMicrokernelTester()
2105       .batch_size(16)
2106       .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2107   }
2108 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_div_16)2109   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
2110     TEST_REQUIRES_X86_AVX;
2111     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2112       VAddMicrokernelTester()
2113         .batch_size(batch_size)
2114         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2115     }
2116   }
2117 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_lt_16)2118   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
2119     TEST_REQUIRES_X86_AVX;
2120     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2121       VAddMicrokernelTester()
2122         .batch_size(batch_size)
2123         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2124     }
2125   }
2126 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_gt_16)2127   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
2128     TEST_REQUIRES_X86_AVX;
2129     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2130       VAddMicrokernelTester()
2131         .batch_size(batch_size)
2132         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2133     }
2134   }
2135 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_a)2136   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a) {
2137     TEST_REQUIRES_X86_AVX;
2138     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2139       VAddMicrokernelTester()
2140         .batch_size(batch_size)
2141         .inplace_a(true)
2142         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2143     }
2144   }
2145 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_b)2146   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_b) {
2147     TEST_REQUIRES_X86_AVX;
2148     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2149       VAddMicrokernelTester()
2150         .batch_size(batch_size)
2151         .inplace_b(true)
2152         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2153     }
2154   }
2155 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_a_and_b)2156   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a_and_b) {
2157     TEST_REQUIRES_X86_AVX;
2158     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2159       VAddMicrokernelTester()
2160         .batch_size(batch_size)
2161         .inplace_a(true)
2162         .inplace_b(true)
2163         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2164     }
2165   }
2166 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,a_zero_point)2167   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
2168     TEST_REQUIRES_X86_AVX;
2169     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2170       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2171         VAddMicrokernelTester()
2172           .batch_size(batch_size)
2173           .a_zero_point(a_zero_point)
2174           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2175       }
2176     }
2177   }
2178 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,b_zero_point)2179   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
2180     TEST_REQUIRES_X86_AVX;
2181     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2182       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2183         VAddMicrokernelTester()
2184           .batch_size(batch_size)
2185           .b_zero_point(b_zero_point)
2186           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2187       }
2188     }
2189   }
2190 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,y_zero_point)2191   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
2192     TEST_REQUIRES_X86_AVX;
2193     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2194       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2195         VAddMicrokernelTester()
2196           .batch_size(batch_size)
2197           .y_zero_point(y_zero_point)
2198           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2199       }
2200     }
2201   }
2202 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,a_scale)2203   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
2204     TEST_REQUIRES_X86_AVX;
2205     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2206       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2207         VAddMicrokernelTester()
2208           .batch_size(batch_size)
2209           .a_scale(a_scale)
2210           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2211       }
2212     }
2213   }
2214 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,b_scale)2215   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
2216     TEST_REQUIRES_X86_AVX;
2217     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2218       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2219         VAddMicrokernelTester()
2220           .batch_size(batch_size)
2221           .b_scale(b_scale)
2222           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2223       }
2224     }
2225   }
2226 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,y_scale)2227   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
2228     TEST_REQUIRES_X86_AVX;
2229     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2230       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2231         VAddMicrokernelTester()
2232           .batch_size(batch_size)
2233           .y_scale(y_scale)
2234           .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2235       }
2236     }
2237   }
2238 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,qmin)2239   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmin) {
2240     TEST_REQUIRES_X86_AVX;
2241     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2242       VAddMicrokernelTester()
2243         .batch_size(batch_size)
2244         .qmin(128)
2245         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2246     }
2247   }
2248 
TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16,qmax)2249   TEST(QU8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmax) {
2250     TEST_REQUIRES_X86_AVX;
2251     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2252       VAddMicrokernelTester()
2253         .batch_size(batch_size)
2254         .qmax(128)
2255         .Test(xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2256     }
2257   }
2258 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259 
2260 
2261 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_eq_8)2262   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
2263     TEST_REQUIRES_X86_XOP;
2264     VAddMicrokernelTester()
2265       .batch_size(8)
2266       .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2267   }
2268 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_div_8)2269   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
2270     TEST_REQUIRES_X86_XOP;
2271     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2272       VAddMicrokernelTester()
2273         .batch_size(batch_size)
2274         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2275     }
2276   }
2277 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_lt_8)2278   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
2279     TEST_REQUIRES_X86_XOP;
2280     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2281       VAddMicrokernelTester()
2282         .batch_size(batch_size)
2283         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2284     }
2285   }
2286 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_gt_8)2287   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
2288     TEST_REQUIRES_X86_XOP;
2289     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2290       VAddMicrokernelTester()
2291         .batch_size(batch_size)
2292         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2293     }
2294   }
2295 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_a)2296   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
2297     TEST_REQUIRES_X86_XOP;
2298     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2299       VAddMicrokernelTester()
2300         .batch_size(batch_size)
2301         .inplace_a(true)
2302         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2303     }
2304   }
2305 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_b)2306   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
2307     TEST_REQUIRES_X86_XOP;
2308     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2309       VAddMicrokernelTester()
2310         .batch_size(batch_size)
2311         .inplace_b(true)
2312         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2313     }
2314   }
2315 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_a_and_b)2316   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
2317     TEST_REQUIRES_X86_XOP;
2318     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2319       VAddMicrokernelTester()
2320         .batch_size(batch_size)
2321         .inplace_a(true)
2322         .inplace_b(true)
2323         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2324     }
2325   }
2326 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,a_zero_point)2327   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
2328     TEST_REQUIRES_X86_XOP;
2329     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2330       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2331         VAddMicrokernelTester()
2332           .batch_size(batch_size)
2333           .a_zero_point(a_zero_point)
2334           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2335       }
2336     }
2337   }
2338 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,b_zero_point)2339   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
2340     TEST_REQUIRES_X86_XOP;
2341     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2342       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2343         VAddMicrokernelTester()
2344           .batch_size(batch_size)
2345           .b_zero_point(b_zero_point)
2346           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2347       }
2348     }
2349   }
2350 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,y_zero_point)2351   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
2352     TEST_REQUIRES_X86_XOP;
2353     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2354       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2355         VAddMicrokernelTester()
2356           .batch_size(batch_size)
2357           .y_zero_point(y_zero_point)
2358           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2359       }
2360     }
2361   }
2362 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,a_scale)2363   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
2364     TEST_REQUIRES_X86_XOP;
2365     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2366       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2367         VAddMicrokernelTester()
2368           .batch_size(batch_size)
2369           .a_scale(a_scale)
2370           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2371       }
2372     }
2373   }
2374 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,b_scale)2375   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
2376     TEST_REQUIRES_X86_XOP;
2377     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2378       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2379         VAddMicrokernelTester()
2380           .batch_size(batch_size)
2381           .b_scale(b_scale)
2382           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2383       }
2384     }
2385   }
2386 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,y_scale)2387   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
2388     TEST_REQUIRES_X86_XOP;
2389     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2390       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2391         VAddMicrokernelTester()
2392           .batch_size(batch_size)
2393           .y_scale(y_scale)
2394           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2395       }
2396     }
2397   }
2398 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,qmin)2399   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
2400     TEST_REQUIRES_X86_XOP;
2401     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2402       VAddMicrokernelTester()
2403         .batch_size(batch_size)
2404         .qmin(128)
2405         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2406     }
2407   }
2408 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8,qmax)2409   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
2410     TEST_REQUIRES_X86_XOP;
2411     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2412       VAddMicrokernelTester()
2413         .batch_size(batch_size)
2414         .qmax(128)
2415         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2416     }
2417   }
2418 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2419 
2420 
2421 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_eq_16)2422   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
2423     TEST_REQUIRES_X86_XOP;
2424     VAddMicrokernelTester()
2425       .batch_size(16)
2426       .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2427   }
2428 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_div_16)2429   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
2430     TEST_REQUIRES_X86_XOP;
2431     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2432       VAddMicrokernelTester()
2433         .batch_size(batch_size)
2434         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2435     }
2436   }
2437 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_lt_16)2438   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
2439     TEST_REQUIRES_X86_XOP;
2440     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2441       VAddMicrokernelTester()
2442         .batch_size(batch_size)
2443         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2444     }
2445   }
2446 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_gt_16)2447   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
2448     TEST_REQUIRES_X86_XOP;
2449     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2450       VAddMicrokernelTester()
2451         .batch_size(batch_size)
2452         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2453     }
2454   }
2455 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_a)2456   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
2457     TEST_REQUIRES_X86_XOP;
2458     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2459       VAddMicrokernelTester()
2460         .batch_size(batch_size)
2461         .inplace_a(true)
2462         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2463     }
2464   }
2465 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_b)2466   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
2467     TEST_REQUIRES_X86_XOP;
2468     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2469       VAddMicrokernelTester()
2470         .batch_size(batch_size)
2471         .inplace_b(true)
2472         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2473     }
2474   }
2475 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_a_and_b)2476   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
2477     TEST_REQUIRES_X86_XOP;
2478     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479       VAddMicrokernelTester()
2480         .batch_size(batch_size)
2481         .inplace_a(true)
2482         .inplace_b(true)
2483         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2484     }
2485   }
2486 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,a_zero_point)2487   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
2488     TEST_REQUIRES_X86_XOP;
2489     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2490       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2491         VAddMicrokernelTester()
2492           .batch_size(batch_size)
2493           .a_zero_point(a_zero_point)
2494           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2495       }
2496     }
2497   }
2498 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,b_zero_point)2499   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
2500     TEST_REQUIRES_X86_XOP;
2501     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2502       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2503         VAddMicrokernelTester()
2504           .batch_size(batch_size)
2505           .b_zero_point(b_zero_point)
2506           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2507       }
2508     }
2509   }
2510 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,y_zero_point)2511   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
2512     TEST_REQUIRES_X86_XOP;
2513     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2515         VAddMicrokernelTester()
2516           .batch_size(batch_size)
2517           .y_zero_point(y_zero_point)
2518           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2519       }
2520     }
2521   }
2522 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,a_scale)2523   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
2524     TEST_REQUIRES_X86_XOP;
2525     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2526       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2527         VAddMicrokernelTester()
2528           .batch_size(batch_size)
2529           .a_scale(a_scale)
2530           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2531       }
2532     }
2533   }
2534 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,b_scale)2535   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
2536     TEST_REQUIRES_X86_XOP;
2537     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2538       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2539         VAddMicrokernelTester()
2540           .batch_size(batch_size)
2541           .b_scale(b_scale)
2542           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2543       }
2544     }
2545   }
2546 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,y_scale)2547   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
2548     TEST_REQUIRES_X86_XOP;
2549     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2550       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2551         VAddMicrokernelTester()
2552           .batch_size(batch_size)
2553           .y_scale(y_scale)
2554           .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2555       }
2556     }
2557   }
2558 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,qmin)2559   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
2560     TEST_REQUIRES_X86_XOP;
2561     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2562       VAddMicrokernelTester()
2563         .batch_size(batch_size)
2564         .qmin(128)
2565         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2566     }
2567   }
2568 
TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16,qmax)2569   TEST(QU8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
2570     TEST_REQUIRES_X86_XOP;
2571     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2572       VAddMicrokernelTester()
2573         .batch_size(batch_size)
2574         .qmax(128)
2575         .Test(xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2576     }
2577   }
2578 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2579 
2580 
2581 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_eq_8)2582   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
2583     TEST_REQUIRES_X86_AVX2;
2584     VAddMicrokernelTester()
2585       .batch_size(8)
2586       .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2587   }
2588 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_div_8)2589   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
2590     TEST_REQUIRES_X86_AVX2;
2591     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2592       VAddMicrokernelTester()
2593         .batch_size(batch_size)
2594         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2595     }
2596   }
2597 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_lt_8)2598   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
2599     TEST_REQUIRES_X86_AVX2;
2600     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2601       VAddMicrokernelTester()
2602         .batch_size(batch_size)
2603         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2604     }
2605   }
2606 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_gt_8)2607   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
2608     TEST_REQUIRES_X86_AVX2;
2609     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2610       VAddMicrokernelTester()
2611         .batch_size(batch_size)
2612         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2613     }
2614   }
2615 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_a)2616   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
2617     TEST_REQUIRES_X86_AVX2;
2618     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2619       VAddMicrokernelTester()
2620         .batch_size(batch_size)
2621         .inplace_a(true)
2622         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2623     }
2624   }
2625 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_b)2626   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
2627     TEST_REQUIRES_X86_AVX2;
2628     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2629       VAddMicrokernelTester()
2630         .batch_size(batch_size)
2631         .inplace_b(true)
2632         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2633     }
2634   }
2635 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_a_and_b)2636   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
2637     TEST_REQUIRES_X86_AVX2;
2638     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2639       VAddMicrokernelTester()
2640         .batch_size(batch_size)
2641         .inplace_a(true)
2642         .inplace_b(true)
2643         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2644     }
2645   }
2646 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,a_zero_point)2647   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
2648     TEST_REQUIRES_X86_AVX2;
2649     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2650       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2651         VAddMicrokernelTester()
2652           .batch_size(batch_size)
2653           .a_zero_point(a_zero_point)
2654           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2655       }
2656     }
2657   }
2658 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,b_zero_point)2659   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
2660     TEST_REQUIRES_X86_AVX2;
2661     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2662       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2663         VAddMicrokernelTester()
2664           .batch_size(batch_size)
2665           .b_zero_point(b_zero_point)
2666           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2667       }
2668     }
2669   }
2670 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,y_zero_point)2671   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
2672     TEST_REQUIRES_X86_AVX2;
2673     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2674       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2675         VAddMicrokernelTester()
2676           .batch_size(batch_size)
2677           .y_zero_point(y_zero_point)
2678           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2679       }
2680     }
2681   }
2682 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,a_scale)2683   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
2684     TEST_REQUIRES_X86_AVX2;
2685     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2686       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2687         VAddMicrokernelTester()
2688           .batch_size(batch_size)
2689           .a_scale(a_scale)
2690           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2691       }
2692     }
2693   }
2694 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,b_scale)2695   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
2696     TEST_REQUIRES_X86_AVX2;
2697     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2698       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2699         VAddMicrokernelTester()
2700           .batch_size(batch_size)
2701           .b_scale(b_scale)
2702           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2703       }
2704     }
2705   }
2706 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,y_scale)2707   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
2708     TEST_REQUIRES_X86_AVX2;
2709     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2710       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2711         VAddMicrokernelTester()
2712           .batch_size(batch_size)
2713           .y_scale(y_scale)
2714           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2715       }
2716     }
2717   }
2718 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,qmin)2719   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
2720     TEST_REQUIRES_X86_AVX2;
2721     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2722       VAddMicrokernelTester()
2723         .batch_size(batch_size)
2724         .qmin(128)
2725         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2726     }
2727   }
2728 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8,qmax)2729   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
2730     TEST_REQUIRES_X86_AVX2;
2731     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2732       VAddMicrokernelTester()
2733         .batch_size(batch_size)
2734         .qmax(128)
2735         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2736     }
2737   }
2738 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2739 
2740 
2741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_eq_16)2742   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
2743     TEST_REQUIRES_X86_AVX2;
2744     VAddMicrokernelTester()
2745       .batch_size(16)
2746       .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2747   }
2748 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_div_16)2749   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
2750     TEST_REQUIRES_X86_AVX2;
2751     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2752       VAddMicrokernelTester()
2753         .batch_size(batch_size)
2754         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2755     }
2756   }
2757 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_lt_16)2758   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
2759     TEST_REQUIRES_X86_AVX2;
2760     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2761       VAddMicrokernelTester()
2762         .batch_size(batch_size)
2763         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2764     }
2765   }
2766 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_gt_16)2767   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
2768     TEST_REQUIRES_X86_AVX2;
2769     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2770       VAddMicrokernelTester()
2771         .batch_size(batch_size)
2772         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2773     }
2774   }
2775 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_a)2776   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
2777     TEST_REQUIRES_X86_AVX2;
2778     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2779       VAddMicrokernelTester()
2780         .batch_size(batch_size)
2781         .inplace_a(true)
2782         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2783     }
2784   }
2785 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_b)2786   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
2787     TEST_REQUIRES_X86_AVX2;
2788     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2789       VAddMicrokernelTester()
2790         .batch_size(batch_size)
2791         .inplace_b(true)
2792         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2793     }
2794   }
2795 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_a_and_b)2796   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
2797     TEST_REQUIRES_X86_AVX2;
2798     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2799       VAddMicrokernelTester()
2800         .batch_size(batch_size)
2801         .inplace_a(true)
2802         .inplace_b(true)
2803         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2804     }
2805   }
2806 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,a_zero_point)2807   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
2808     TEST_REQUIRES_X86_AVX2;
2809     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2810       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2811         VAddMicrokernelTester()
2812           .batch_size(batch_size)
2813           .a_zero_point(a_zero_point)
2814           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2815       }
2816     }
2817   }
2818 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,b_zero_point)2819   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
2820     TEST_REQUIRES_X86_AVX2;
2821     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2822       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2823         VAddMicrokernelTester()
2824           .batch_size(batch_size)
2825           .b_zero_point(b_zero_point)
2826           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2827       }
2828     }
2829   }
2830 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,y_zero_point)2831   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
2832     TEST_REQUIRES_X86_AVX2;
2833     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2834       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2835         VAddMicrokernelTester()
2836           .batch_size(batch_size)
2837           .y_zero_point(y_zero_point)
2838           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2839       }
2840     }
2841   }
2842 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,a_scale)2843   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
2844     TEST_REQUIRES_X86_AVX2;
2845     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2846       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2847         VAddMicrokernelTester()
2848           .batch_size(batch_size)
2849           .a_scale(a_scale)
2850           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2851       }
2852     }
2853   }
2854 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,b_scale)2855   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
2856     TEST_REQUIRES_X86_AVX2;
2857     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2858       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2859         VAddMicrokernelTester()
2860           .batch_size(batch_size)
2861           .b_scale(b_scale)
2862           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2863       }
2864     }
2865   }
2866 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,y_scale)2867   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
2868     TEST_REQUIRES_X86_AVX2;
2869     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2870       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2871         VAddMicrokernelTester()
2872           .batch_size(batch_size)
2873           .y_scale(y_scale)
2874           .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2875       }
2876     }
2877   }
2878 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,qmin)2879   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
2880     TEST_REQUIRES_X86_AVX2;
2881     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2882       VAddMicrokernelTester()
2883         .batch_size(batch_size)
2884         .qmin(128)
2885         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2886     }
2887   }
2888 
TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16,qmax)2889   TEST(QU8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
2890     TEST_REQUIRES_X86_AVX2;
2891     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2892       VAddMicrokernelTester()
2893         .batch_size(batch_size)
2894         .qmax(128)
2895         .Test(xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2896     }
2897   }
2898 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2899 
2900 
2901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_eq_16)2902   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
2903     TEST_REQUIRES_X86_AVX512SKX;
2904     VAddMicrokernelTester()
2905       .batch_size(16)
2906       .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2907   }
2908 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_div_16)2909   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
2910     TEST_REQUIRES_X86_AVX512SKX;
2911     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2912       VAddMicrokernelTester()
2913         .batch_size(batch_size)
2914         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2915     }
2916   }
2917 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_lt_16)2918   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
2919     TEST_REQUIRES_X86_AVX512SKX;
2920     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2921       VAddMicrokernelTester()
2922         .batch_size(batch_size)
2923         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2924     }
2925   }
2926 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_gt_16)2927   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
2928     TEST_REQUIRES_X86_AVX512SKX;
2929     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2930       VAddMicrokernelTester()
2931         .batch_size(batch_size)
2932         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2933     }
2934   }
2935 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_a)2936   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a) {
2937     TEST_REQUIRES_X86_AVX512SKX;
2938     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2939       VAddMicrokernelTester()
2940         .batch_size(batch_size)
2941         .inplace_a(true)
2942         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2943     }
2944   }
2945 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_b)2946   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_b) {
2947     TEST_REQUIRES_X86_AVX512SKX;
2948     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2949       VAddMicrokernelTester()
2950         .batch_size(batch_size)
2951         .inplace_b(true)
2952         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2953     }
2954   }
2955 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_a_and_b)2956   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a_and_b) {
2957     TEST_REQUIRES_X86_AVX512SKX;
2958     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2959       VAddMicrokernelTester()
2960         .batch_size(batch_size)
2961         .inplace_a(true)
2962         .inplace_b(true)
2963         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2964     }
2965   }
2966 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,a_zero_point)2967   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
2968     TEST_REQUIRES_X86_AVX512SKX;
2969     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2970       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2971         VAddMicrokernelTester()
2972           .batch_size(batch_size)
2973           .a_zero_point(a_zero_point)
2974           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2975       }
2976     }
2977   }
2978 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,b_zero_point)2979   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
2980     TEST_REQUIRES_X86_AVX512SKX;
2981     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2982       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2983         VAddMicrokernelTester()
2984           .batch_size(batch_size)
2985           .b_zero_point(b_zero_point)
2986           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2987       }
2988     }
2989   }
2990 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,y_zero_point)2991   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
2992     TEST_REQUIRES_X86_AVX512SKX;
2993     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2994       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2995         VAddMicrokernelTester()
2996           .batch_size(batch_size)
2997           .y_zero_point(y_zero_point)
2998           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2999       }
3000     }
3001   }
3002 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,a_scale)3003   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
3004     TEST_REQUIRES_X86_AVX512SKX;
3005     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3006       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3007         VAddMicrokernelTester()
3008           .batch_size(batch_size)
3009           .a_scale(a_scale)
3010           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3011       }
3012     }
3013   }
3014 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,b_scale)3015   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
3016     TEST_REQUIRES_X86_AVX512SKX;
3017     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3018       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3019         VAddMicrokernelTester()
3020           .batch_size(batch_size)
3021           .b_scale(b_scale)
3022           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3023       }
3024     }
3025   }
3026 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,y_scale)3027   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
3028     TEST_REQUIRES_X86_AVX512SKX;
3029     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3030       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3031         VAddMicrokernelTester()
3032           .batch_size(batch_size)
3033           .y_scale(y_scale)
3034           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3035       }
3036     }
3037   }
3038 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,qmin)3039   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
3040     TEST_REQUIRES_X86_AVX512SKX;
3041     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3042       VAddMicrokernelTester()
3043         .batch_size(batch_size)
3044         .qmin(128)
3045         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3046     }
3047   }
3048 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,qmax)3049   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
3050     TEST_REQUIRES_X86_AVX512SKX;
3051     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3052       VAddMicrokernelTester()
3053         .batch_size(batch_size)
3054         .qmax(128)
3055         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
3056     }
3057   }
3058 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3059 
3060 
3061 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_eq_32)3062   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
3063     TEST_REQUIRES_X86_AVX512SKX;
3064     VAddMicrokernelTester()
3065       .batch_size(32)
3066       .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3067   }
3068 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_div_32)3069   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
3070     TEST_REQUIRES_X86_AVX512SKX;
3071     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3072       VAddMicrokernelTester()
3073         .batch_size(batch_size)
3074         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3075     }
3076   }
3077 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_lt_32)3078   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
3079     TEST_REQUIRES_X86_AVX512SKX;
3080     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3081       VAddMicrokernelTester()
3082         .batch_size(batch_size)
3083         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3084     }
3085   }
3086 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_gt_32)3087   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
3088     TEST_REQUIRES_X86_AVX512SKX;
3089     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3090       VAddMicrokernelTester()
3091         .batch_size(batch_size)
3092         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3093     }
3094   }
3095 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_a)3096   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a) {
3097     TEST_REQUIRES_X86_AVX512SKX;
3098     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3099       VAddMicrokernelTester()
3100         .batch_size(batch_size)
3101         .inplace_a(true)
3102         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3103     }
3104   }
3105 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_b)3106   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_b) {
3107     TEST_REQUIRES_X86_AVX512SKX;
3108     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3109       VAddMicrokernelTester()
3110         .batch_size(batch_size)
3111         .inplace_b(true)
3112         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3113     }
3114   }
3115 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_a_and_b)3116   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a_and_b) {
3117     TEST_REQUIRES_X86_AVX512SKX;
3118     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3119       VAddMicrokernelTester()
3120         .batch_size(batch_size)
3121         .inplace_a(true)
3122         .inplace_b(true)
3123         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3124     }
3125   }
3126 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,a_zero_point)3127   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
3128     TEST_REQUIRES_X86_AVX512SKX;
3129     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3130       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3131         VAddMicrokernelTester()
3132           .batch_size(batch_size)
3133           .a_zero_point(a_zero_point)
3134           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3135       }
3136     }
3137   }
3138 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,b_zero_point)3139   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
3140     TEST_REQUIRES_X86_AVX512SKX;
3141     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3142       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3143         VAddMicrokernelTester()
3144           .batch_size(batch_size)
3145           .b_zero_point(b_zero_point)
3146           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3147       }
3148     }
3149   }
3150 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,y_zero_point)3151   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
3152     TEST_REQUIRES_X86_AVX512SKX;
3153     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3154       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3155         VAddMicrokernelTester()
3156           .batch_size(batch_size)
3157           .y_zero_point(y_zero_point)
3158           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3159       }
3160     }
3161   }
3162 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,a_scale)3163   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
3164     TEST_REQUIRES_X86_AVX512SKX;
3165     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3166       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3167         VAddMicrokernelTester()
3168           .batch_size(batch_size)
3169           .a_scale(a_scale)
3170           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3171       }
3172     }
3173   }
3174 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,b_scale)3175   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
3176     TEST_REQUIRES_X86_AVX512SKX;
3177     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3178       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3179         VAddMicrokernelTester()
3180           .batch_size(batch_size)
3181           .b_scale(b_scale)
3182           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3183       }
3184     }
3185   }
3186 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,y_scale)3187   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
3188     TEST_REQUIRES_X86_AVX512SKX;
3189     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3190       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3191         VAddMicrokernelTester()
3192           .batch_size(batch_size)
3193           .y_scale(y_scale)
3194           .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3195       }
3196     }
3197   }
3198 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,qmin)3199   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
3200     TEST_REQUIRES_X86_AVX512SKX;
3201     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3202       VAddMicrokernelTester()
3203         .batch_size(batch_size)
3204         .qmin(128)
3205         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3206     }
3207   }
3208 
TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,qmax)3209   TEST(QU8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
3210     TEST_REQUIRES_X86_AVX512SKX;
3211     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3212       VAddMicrokernelTester()
3213         .batch_size(batch_size)
3214         .qmax(128)
3215         .Test(xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
3216     }
3217   }
3218 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3219 
3220 
3221 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,batch_eq_8)3222   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
3223     VAddMicrokernelTester()
3224       .batch_size(8)
3225       .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3226   }
3227 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,batch_div_8)3228   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
3229     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3230       VAddMicrokernelTester()
3231         .batch_size(batch_size)
3232         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3233     }
3234   }
3235 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,batch_lt_8)3236   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
3237     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3238       VAddMicrokernelTester()
3239         .batch_size(batch_size)
3240         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3241     }
3242   }
3243 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,batch_gt_8)3244   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
3245     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3246       VAddMicrokernelTester()
3247         .batch_size(batch_size)
3248         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3249     }
3250   }
3251 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,inplace_a)3252   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
3253     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3254       VAddMicrokernelTester()
3255         .batch_size(batch_size)
3256         .inplace_a(true)
3257         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3258     }
3259   }
3260 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,inplace_b)3261   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
3262     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3263       VAddMicrokernelTester()
3264         .batch_size(batch_size)
3265         .inplace_b(true)
3266         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3267     }
3268   }
3269 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,inplace_a_and_b)3270   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
3271     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3272       VAddMicrokernelTester()
3273         .batch_size(batch_size)
3274         .inplace_a(true)
3275         .inplace_b(true)
3276         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3277     }
3278   }
3279 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,a_zero_point)3280   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
3281     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3282       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3283         VAddMicrokernelTester()
3284           .batch_size(batch_size)
3285           .a_zero_point(a_zero_point)
3286           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3287       }
3288     }
3289   }
3290 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,b_zero_point)3291   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
3292     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3293       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3294         VAddMicrokernelTester()
3295           .batch_size(batch_size)
3296           .b_zero_point(b_zero_point)
3297           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3298       }
3299     }
3300   }
3301 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,y_zero_point)3302   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
3303     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3304       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3305         VAddMicrokernelTester()
3306           .batch_size(batch_size)
3307           .y_zero_point(y_zero_point)
3308           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3309       }
3310     }
3311   }
3312 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,a_scale)3313   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
3314     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3315       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3316         VAddMicrokernelTester()
3317           .batch_size(batch_size)
3318           .a_scale(a_scale)
3319           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3320       }
3321     }
3322   }
3323 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,b_scale)3324   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
3325     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3326       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3327         VAddMicrokernelTester()
3328           .batch_size(batch_size)
3329           .b_scale(b_scale)
3330           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3331       }
3332     }
3333   }
3334 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,y_scale)3335   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
3336     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3337       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3338         VAddMicrokernelTester()
3339           .batch_size(batch_size)
3340           .y_scale(y_scale)
3341           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3342       }
3343     }
3344   }
3345 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,qmin)3346   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, qmin) {
3347     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3348       VAddMicrokernelTester()
3349         .batch_size(batch_size)
3350         .qmin(128)
3351         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3352     }
3353   }
3354 
TEST(QU8_VADD_MINMAX__WASMSIMD_X8,qmax)3355   TEST(QU8_VADD_MINMAX__WASMSIMD_X8, qmax) {
3356     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3357       VAddMicrokernelTester()
3358         .batch_size(batch_size)
3359         .qmax(128)
3360         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
3361     }
3362   }
3363 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3364 
3365 
3366 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,batch_eq_16)3367   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
3368     VAddMicrokernelTester()
3369       .batch_size(16)
3370       .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3371   }
3372 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,batch_div_16)3373   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
3374     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3375       VAddMicrokernelTester()
3376         .batch_size(batch_size)
3377         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3378     }
3379   }
3380 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,batch_lt_16)3381   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
3382     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3383       VAddMicrokernelTester()
3384         .batch_size(batch_size)
3385         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3386     }
3387   }
3388 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,batch_gt_16)3389   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
3390     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3391       VAddMicrokernelTester()
3392         .batch_size(batch_size)
3393         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3394     }
3395   }
3396 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,inplace_a)3397   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
3398     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3399       VAddMicrokernelTester()
3400         .batch_size(batch_size)
3401         .inplace_a(true)
3402         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3403     }
3404   }
3405 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,inplace_b)3406   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
3407     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3408       VAddMicrokernelTester()
3409         .batch_size(batch_size)
3410         .inplace_b(true)
3411         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3412     }
3413   }
3414 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,inplace_a_and_b)3415   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
3416     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3417       VAddMicrokernelTester()
3418         .batch_size(batch_size)
3419         .inplace_a(true)
3420         .inplace_b(true)
3421         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3422     }
3423   }
3424 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,a_zero_point)3425   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
3426     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3427       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3428         VAddMicrokernelTester()
3429           .batch_size(batch_size)
3430           .a_zero_point(a_zero_point)
3431           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3432       }
3433     }
3434   }
3435 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,b_zero_point)3436   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
3437     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3438       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3439         VAddMicrokernelTester()
3440           .batch_size(batch_size)
3441           .b_zero_point(b_zero_point)
3442           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3443       }
3444     }
3445   }
3446 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,y_zero_point)3447   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
3448     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3449       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3450         VAddMicrokernelTester()
3451           .batch_size(batch_size)
3452           .y_zero_point(y_zero_point)
3453           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3454       }
3455     }
3456   }
3457 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,a_scale)3458   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
3459     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3460       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3461         VAddMicrokernelTester()
3462           .batch_size(batch_size)
3463           .a_scale(a_scale)
3464           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3465       }
3466     }
3467   }
3468 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,b_scale)3469   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
3470     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3471       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3472         VAddMicrokernelTester()
3473           .batch_size(batch_size)
3474           .b_scale(b_scale)
3475           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3476       }
3477     }
3478   }
3479 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,y_scale)3480   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
3481     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3482       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3483         VAddMicrokernelTester()
3484           .batch_size(batch_size)
3485           .y_scale(y_scale)
3486           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3487       }
3488     }
3489   }
3490 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,qmin)3491   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, qmin) {
3492     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3493       VAddMicrokernelTester()
3494         .batch_size(batch_size)
3495         .qmin(128)
3496         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3497     }
3498   }
3499 
TEST(QU8_VADD_MINMAX__WASMSIMD_X16,qmax)3500   TEST(QU8_VADD_MINMAX__WASMSIMD_X16, qmax) {
3501     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3502       VAddMicrokernelTester()
3503         .batch_size(batch_size)
3504         .qmax(128)
3505         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3506     }
3507   }
3508 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3509 
3510 
3511 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,batch_eq_32)3512   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_eq_32) {
3513     VAddMicrokernelTester()
3514       .batch_size(32)
3515       .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3516   }
3517 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,batch_div_32)3518   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_div_32) {
3519     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3520       VAddMicrokernelTester()
3521         .batch_size(batch_size)
3522         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3523     }
3524   }
3525 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,batch_lt_32)3526   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_lt_32) {
3527     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3528       VAddMicrokernelTester()
3529         .batch_size(batch_size)
3530         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3531     }
3532   }
3533 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,batch_gt_32)3534   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, batch_gt_32) {
3535     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3536       VAddMicrokernelTester()
3537         .batch_size(batch_size)
3538         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3539     }
3540   }
3541 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,inplace_a)3542   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, inplace_a) {
3543     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3544       VAddMicrokernelTester()
3545         .batch_size(batch_size)
3546         .inplace_a(true)
3547         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3548     }
3549   }
3550 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,inplace_b)3551   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, inplace_b) {
3552     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3553       VAddMicrokernelTester()
3554         .batch_size(batch_size)
3555         .inplace_b(true)
3556         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3557     }
3558   }
3559 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,inplace_a_and_b)3560   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, inplace_a_and_b) {
3561     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3562       VAddMicrokernelTester()
3563         .batch_size(batch_size)
3564         .inplace_a(true)
3565         .inplace_b(true)
3566         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3567     }
3568   }
3569 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,a_zero_point)3570   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, a_zero_point) {
3571     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3572       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3573         VAddMicrokernelTester()
3574           .batch_size(batch_size)
3575           .a_zero_point(a_zero_point)
3576           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3577       }
3578     }
3579   }
3580 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,b_zero_point)3581   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, b_zero_point) {
3582     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3583       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3584         VAddMicrokernelTester()
3585           .batch_size(batch_size)
3586           .b_zero_point(b_zero_point)
3587           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3588       }
3589     }
3590   }
3591 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,y_zero_point)3592   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, y_zero_point) {
3593     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3594       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3595         VAddMicrokernelTester()
3596           .batch_size(batch_size)
3597           .y_zero_point(y_zero_point)
3598           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3599       }
3600     }
3601   }
3602 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,a_scale)3603   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, a_scale) {
3604     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3605       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3606         VAddMicrokernelTester()
3607           .batch_size(batch_size)
3608           .a_scale(a_scale)
3609           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3610       }
3611     }
3612   }
3613 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,b_scale)3614   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, b_scale) {
3615     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3616       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3617         VAddMicrokernelTester()
3618           .batch_size(batch_size)
3619           .b_scale(b_scale)
3620           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3621       }
3622     }
3623   }
3624 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,y_scale)3625   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, y_scale) {
3626     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3627       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3628         VAddMicrokernelTester()
3629           .batch_size(batch_size)
3630           .y_scale(y_scale)
3631           .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3632       }
3633     }
3634   }
3635 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,qmin)3636   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, qmin) {
3637     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3638       VAddMicrokernelTester()
3639         .batch_size(batch_size)
3640         .qmin(128)
3641         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3642     }
3643   }
3644 
TEST(QU8_VADD_MINMAX__WASMSIMD_X32,qmax)3645   TEST(QU8_VADD_MINMAX__WASMSIMD_X32, qmax) {
3646     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3647       VAddMicrokernelTester()
3648         .batch_size(batch_size)
3649         .qmax(128)
3650         .Test(xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3651     }
3652   }
3653 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3654 
3655 
TEST(QU8_VADD_MINMAX__SCALAR_X1,batch_eq_1)3656 TEST(QU8_VADD_MINMAX__SCALAR_X1, batch_eq_1) {
3657   VAddMicrokernelTester()
3658     .batch_size(1)
3659     .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3660 }
3661 
TEST(QU8_VADD_MINMAX__SCALAR_X1,batch_gt_1)3662 TEST(QU8_VADD_MINMAX__SCALAR_X1, batch_gt_1) {
3663   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
3664     VAddMicrokernelTester()
3665       .batch_size(batch_size)
3666       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3667   }
3668 }
3669 
TEST(QU8_VADD_MINMAX__SCALAR_X1,inplace_a)3670 TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_a) {
3671   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3672     VAddMicrokernelTester()
3673       .batch_size(batch_size)
3674       .inplace_a(true)
3675       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3676   }
3677 }
3678 
TEST(QU8_VADD_MINMAX__SCALAR_X1,inplace_b)3679 TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_b) {
3680   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3681     VAddMicrokernelTester()
3682       .batch_size(batch_size)
3683       .inplace_b(true)
3684       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3685   }
3686 }
3687 
TEST(QU8_VADD_MINMAX__SCALAR_X1,inplace_a_and_b)3688 TEST(QU8_VADD_MINMAX__SCALAR_X1, inplace_a_and_b) {
3689   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3690     VAddMicrokernelTester()
3691       .batch_size(batch_size)
3692       .inplace_a(true)
3693       .inplace_b(true)
3694       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3695   }
3696 }
3697 
TEST(QU8_VADD_MINMAX__SCALAR_X1,a_zero_point)3698 TEST(QU8_VADD_MINMAX__SCALAR_X1, a_zero_point) {
3699   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3700     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3701       VAddMicrokernelTester()
3702         .batch_size(batch_size)
3703         .a_zero_point(a_zero_point)
3704         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3705     }
3706   }
3707 }
3708 
TEST(QU8_VADD_MINMAX__SCALAR_X1,b_zero_point)3709 TEST(QU8_VADD_MINMAX__SCALAR_X1, b_zero_point) {
3710   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3711     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3712       VAddMicrokernelTester()
3713         .batch_size(batch_size)
3714         .b_zero_point(b_zero_point)
3715         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3716     }
3717   }
3718 }
3719 
TEST(QU8_VADD_MINMAX__SCALAR_X1,y_zero_point)3720 TEST(QU8_VADD_MINMAX__SCALAR_X1, y_zero_point) {
3721   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3722     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3723       VAddMicrokernelTester()
3724         .batch_size(batch_size)
3725         .y_zero_point(y_zero_point)
3726         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3727     }
3728   }
3729 }
3730 
TEST(QU8_VADD_MINMAX__SCALAR_X1,a_scale)3731 TEST(QU8_VADD_MINMAX__SCALAR_X1, a_scale) {
3732   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3733     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3734       VAddMicrokernelTester()
3735         .batch_size(batch_size)
3736         .a_scale(a_scale)
3737         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3738     }
3739   }
3740 }
3741 
TEST(QU8_VADD_MINMAX__SCALAR_X1,b_scale)3742 TEST(QU8_VADD_MINMAX__SCALAR_X1, b_scale) {
3743   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3744     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3745       VAddMicrokernelTester()
3746         .batch_size(batch_size)
3747         .b_scale(b_scale)
3748         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3749     }
3750   }
3751 }
3752 
TEST(QU8_VADD_MINMAX__SCALAR_X1,y_scale)3753 TEST(QU8_VADD_MINMAX__SCALAR_X1, y_scale) {
3754   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3755     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3756       VAddMicrokernelTester()
3757         .batch_size(batch_size)
3758         .y_scale(y_scale)
3759         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3760     }
3761   }
3762 }
3763 
TEST(QU8_VADD_MINMAX__SCALAR_X1,qmin)3764 TEST(QU8_VADD_MINMAX__SCALAR_X1, qmin) {
3765   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3766     VAddMicrokernelTester()
3767       .batch_size(batch_size)
3768       .qmin(128)
3769       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3770   }
3771 }
3772 
TEST(QU8_VADD_MINMAX__SCALAR_X1,qmax)3773 TEST(QU8_VADD_MINMAX__SCALAR_X1, qmax) {
3774   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3775     VAddMicrokernelTester()
3776       .batch_size(batch_size)
3777       .qmax(128)
3778       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3779   }
3780 }
3781 
TEST(QU8_VADD_MINMAX__SCALAR_X2,batch_eq_2)3782 TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_eq_2) {
3783   VAddMicrokernelTester()
3784     .batch_size(2)
3785     .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3786 }
3787 
TEST(QU8_VADD_MINMAX__SCALAR_X2,batch_div_2)3788 TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_div_2) {
3789   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
3790     VAddMicrokernelTester()
3791       .batch_size(batch_size)
3792       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3793   }
3794 }
3795 
TEST(QU8_VADD_MINMAX__SCALAR_X2,batch_lt_2)3796 TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_lt_2) {
3797   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
3798     VAddMicrokernelTester()
3799       .batch_size(batch_size)
3800       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3801   }
3802 }
3803 
TEST(QU8_VADD_MINMAX__SCALAR_X2,batch_gt_2)3804 TEST(QU8_VADD_MINMAX__SCALAR_X2, batch_gt_2) {
3805   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
3806     VAddMicrokernelTester()
3807       .batch_size(batch_size)
3808       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3809   }
3810 }
3811 
TEST(QU8_VADD_MINMAX__SCALAR_X2,inplace_a)3812 TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_a) {
3813   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3814     VAddMicrokernelTester()
3815       .batch_size(batch_size)
3816       .inplace_a(true)
3817       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3818   }
3819 }
3820 
TEST(QU8_VADD_MINMAX__SCALAR_X2,inplace_b)3821 TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_b) {
3822   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3823     VAddMicrokernelTester()
3824       .batch_size(batch_size)
3825       .inplace_b(true)
3826       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3827   }
3828 }
3829 
TEST(QU8_VADD_MINMAX__SCALAR_X2,inplace_a_and_b)3830 TEST(QU8_VADD_MINMAX__SCALAR_X2, inplace_a_and_b) {
3831   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3832     VAddMicrokernelTester()
3833       .batch_size(batch_size)
3834       .inplace_a(true)
3835       .inplace_b(true)
3836       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3837   }
3838 }
3839 
TEST(QU8_VADD_MINMAX__SCALAR_X2,a_zero_point)3840 TEST(QU8_VADD_MINMAX__SCALAR_X2, a_zero_point) {
3841   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3842     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3843       VAddMicrokernelTester()
3844         .batch_size(batch_size)
3845         .a_zero_point(a_zero_point)
3846         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3847     }
3848   }
3849 }
3850 
TEST(QU8_VADD_MINMAX__SCALAR_X2,b_zero_point)3851 TEST(QU8_VADD_MINMAX__SCALAR_X2, b_zero_point) {
3852   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3853     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3854       VAddMicrokernelTester()
3855         .batch_size(batch_size)
3856         .b_zero_point(b_zero_point)
3857         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3858     }
3859   }
3860 }
3861 
TEST(QU8_VADD_MINMAX__SCALAR_X2,y_zero_point)3862 TEST(QU8_VADD_MINMAX__SCALAR_X2, y_zero_point) {
3863   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3864     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3865       VAddMicrokernelTester()
3866         .batch_size(batch_size)
3867         .y_zero_point(y_zero_point)
3868         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3869     }
3870   }
3871 }
3872 
TEST(QU8_VADD_MINMAX__SCALAR_X2,a_scale)3873 TEST(QU8_VADD_MINMAX__SCALAR_X2, a_scale) {
3874   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3875     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3876       VAddMicrokernelTester()
3877         .batch_size(batch_size)
3878         .a_scale(a_scale)
3879         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3880     }
3881   }
3882 }
3883 
TEST(QU8_VADD_MINMAX__SCALAR_X2,b_scale)3884 TEST(QU8_VADD_MINMAX__SCALAR_X2, b_scale) {
3885   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3886     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3887       VAddMicrokernelTester()
3888         .batch_size(batch_size)
3889         .b_scale(b_scale)
3890         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3891     }
3892   }
3893 }
3894 
TEST(QU8_VADD_MINMAX__SCALAR_X2,y_scale)3895 TEST(QU8_VADD_MINMAX__SCALAR_X2, y_scale) {
3896   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3897     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3898       VAddMicrokernelTester()
3899         .batch_size(batch_size)
3900         .y_scale(y_scale)
3901         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3902     }
3903   }
3904 }
3905 
TEST(QU8_VADD_MINMAX__SCALAR_X2,qmin)3906 TEST(QU8_VADD_MINMAX__SCALAR_X2, qmin) {
3907   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3908     VAddMicrokernelTester()
3909       .batch_size(batch_size)
3910       .qmin(128)
3911       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3912   }
3913 }
3914 
TEST(QU8_VADD_MINMAX__SCALAR_X2,qmax)3915 TEST(QU8_VADD_MINMAX__SCALAR_X2, qmax) {
3916   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3917     VAddMicrokernelTester()
3918       .batch_size(batch_size)
3919       .qmax(128)
3920       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3921   }
3922 }
3923 
TEST(QU8_VADD_MINMAX__SCALAR_X4,batch_eq_4)3924 TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_eq_4) {
3925   VAddMicrokernelTester()
3926     .batch_size(4)
3927     .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3928 }
3929 
TEST(QU8_VADD_MINMAX__SCALAR_X4,batch_div_4)3930 TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_div_4) {
3931   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3932     VAddMicrokernelTester()
3933       .batch_size(batch_size)
3934       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3935   }
3936 }
3937 
TEST(QU8_VADD_MINMAX__SCALAR_X4,batch_lt_4)3938 TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_lt_4) {
3939   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3940     VAddMicrokernelTester()
3941       .batch_size(batch_size)
3942       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3943   }
3944 }
3945 
TEST(QU8_VADD_MINMAX__SCALAR_X4,batch_gt_4)3946 TEST(QU8_VADD_MINMAX__SCALAR_X4, batch_gt_4) {
3947   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3948     VAddMicrokernelTester()
3949       .batch_size(batch_size)
3950       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3951   }
3952 }
3953 
TEST(QU8_VADD_MINMAX__SCALAR_X4,inplace_a)3954 TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_a) {
3955   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3956     VAddMicrokernelTester()
3957       .batch_size(batch_size)
3958       .inplace_a(true)
3959       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3960   }
3961 }
3962 
TEST(QU8_VADD_MINMAX__SCALAR_X4,inplace_b)3963 TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_b) {
3964   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3965     VAddMicrokernelTester()
3966       .batch_size(batch_size)
3967       .inplace_b(true)
3968       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3969   }
3970 }
3971 
TEST(QU8_VADD_MINMAX__SCALAR_X4,inplace_a_and_b)3972 TEST(QU8_VADD_MINMAX__SCALAR_X4, inplace_a_and_b) {
3973   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3974     VAddMicrokernelTester()
3975       .batch_size(batch_size)
3976       .inplace_a(true)
3977       .inplace_b(true)
3978       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3979   }
3980 }
3981 
TEST(QU8_VADD_MINMAX__SCALAR_X4,a_zero_point)3982 TEST(QU8_VADD_MINMAX__SCALAR_X4, a_zero_point) {
3983   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3984     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3985       VAddMicrokernelTester()
3986         .batch_size(batch_size)
3987         .a_zero_point(a_zero_point)
3988         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3989     }
3990   }
3991 }
3992 
TEST(QU8_VADD_MINMAX__SCALAR_X4,b_zero_point)3993 TEST(QU8_VADD_MINMAX__SCALAR_X4, b_zero_point) {
3994   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3995     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3996       VAddMicrokernelTester()
3997         .batch_size(batch_size)
3998         .b_zero_point(b_zero_point)
3999         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4000     }
4001   }
4002 }
4003 
TEST(QU8_VADD_MINMAX__SCALAR_X4,y_zero_point)4004 TEST(QU8_VADD_MINMAX__SCALAR_X4, y_zero_point) {
4005   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4006     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4007       VAddMicrokernelTester()
4008         .batch_size(batch_size)
4009         .y_zero_point(y_zero_point)
4010         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4011     }
4012   }
4013 }
4014 
TEST(QU8_VADD_MINMAX__SCALAR_X4,a_scale)4015 TEST(QU8_VADD_MINMAX__SCALAR_X4, a_scale) {
4016   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4017     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4018       VAddMicrokernelTester()
4019         .batch_size(batch_size)
4020         .a_scale(a_scale)
4021         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4022     }
4023   }
4024 }
4025 
TEST(QU8_VADD_MINMAX__SCALAR_X4,b_scale)4026 TEST(QU8_VADD_MINMAX__SCALAR_X4, b_scale) {
4027   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4028     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4029       VAddMicrokernelTester()
4030         .batch_size(batch_size)
4031         .b_scale(b_scale)
4032         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4033     }
4034   }
4035 }
4036 
TEST(QU8_VADD_MINMAX__SCALAR_X4,y_scale)4037 TEST(QU8_VADD_MINMAX__SCALAR_X4, y_scale) {
4038   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4039     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4040       VAddMicrokernelTester()
4041         .batch_size(batch_size)
4042         .y_scale(y_scale)
4043         .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4044     }
4045   }
4046 }
4047 
TEST(QU8_VADD_MINMAX__SCALAR_X4,qmin)4048 TEST(QU8_VADD_MINMAX__SCALAR_X4, qmin) {
4049   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4050     VAddMicrokernelTester()
4051       .batch_size(batch_size)
4052       .qmin(128)
4053       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4054   }
4055 }
4056 
TEST(QU8_VADD_MINMAX__SCALAR_X4,qmax)4057 TEST(QU8_VADD_MINMAX__SCALAR_X4, qmax) {
4058   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
4059     VAddMicrokernelTester()
4060       .batch_size(batch_size)
4061       .qmax(128)
4062       .Test(xnn_qu8_vadd_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
4063   }
4064 }