1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/qs8-vadd-minmax.yaml
8 // Generator: tools/generate-vbinary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vadd.h>
18 #include "vadd-microkernel-tester.h"
19
20
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_eq_8)22 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VAddMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
27 }
28
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_div_8)29 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VAddMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
35 }
36 }
37
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_lt_8)38 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VAddMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
44 }
45 }
46
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,batch_gt_8)47 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VAddMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
53 }
54 }
55
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,inplace_a)56 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VAddMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace_a(true)
62 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
63 }
64 }
65
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,inplace_b)66 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_b) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 VAddMicrokernelTester()
70 .batch_size(batch_size)
71 .inplace_b(true)
72 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
73 }
74 }
75
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,inplace_a_and_b)76 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, inplace_a_and_b) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79 VAddMicrokernelTester()
80 .batch_size(batch_size)
81 .inplace_a(true)
82 .inplace_b(true)
83 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
84 }
85 }
86
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,a_zero_point)87 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_zero_point) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91 VAddMicrokernelTester()
92 .batch_size(batch_size)
93 .a_zero_point(a_zero_point)
94 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
95 }
96 }
97 }
98
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,b_zero_point)99 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_zero_point) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103 VAddMicrokernelTester()
104 .batch_size(batch_size)
105 .b_zero_point(b_zero_point)
106 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
107 }
108 }
109 }
110
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,y_zero_point)111 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_zero_point) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115 VAddMicrokernelTester()
116 .batch_size(batch_size)
117 .y_zero_point(y_zero_point)
118 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
119 }
120 }
121 }
122
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,a_scale)123 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, a_scale) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127 VAddMicrokernelTester()
128 .batch_size(batch_size)
129 .a_scale(a_scale)
130 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
131 }
132 }
133 }
134
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,b_scale)135 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, b_scale) {
136 TEST_REQUIRES_ARM_NEON;
137 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139 VAddMicrokernelTester()
140 .batch_size(batch_size)
141 .b_scale(b_scale)
142 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
143 }
144 }
145 }
146
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,y_scale)147 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, y_scale) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151 VAddMicrokernelTester()
152 .batch_size(batch_size)
153 .y_scale(y_scale)
154 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
155 }
156 }
157 }
158
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,qmin)159 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmin) {
160 TEST_REQUIRES_ARM_NEON;
161 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162 VAddMicrokernelTester()
163 .batch_size(batch_size)
164 .qmin(128)
165 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
166 }
167 }
168
TEST(QS8_VADD_MINMAX__NEON_LD64_X8,qmax)169 TEST(QS8_VADD_MINMAX__NEON_LD64_X8, qmax) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172 VAddMicrokernelTester()
173 .batch_size(batch_size)
174 .qmax(128)
175 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
176 }
177 }
178 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
179
180
181 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_eq_16)182 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_eq_16) {
183 TEST_REQUIRES_ARM_NEON;
184 VAddMicrokernelTester()
185 .batch_size(16)
186 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
187 }
188
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_div_16)189 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_div_16) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192 VAddMicrokernelTester()
193 .batch_size(batch_size)
194 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
195 }
196 }
197
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_lt_16)198 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_lt_16) {
199 TEST_REQUIRES_ARM_NEON;
200 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201 VAddMicrokernelTester()
202 .batch_size(batch_size)
203 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
204 }
205 }
206
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,batch_gt_16)207 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, batch_gt_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210 VAddMicrokernelTester()
211 .batch_size(batch_size)
212 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
213 }
214 }
215
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,inplace_a)216 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 VAddMicrokernelTester()
220 .batch_size(batch_size)
221 .inplace_a(true)
222 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
223 }
224 }
225
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,inplace_b)226 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_b) {
227 TEST_REQUIRES_ARM_NEON;
228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229 VAddMicrokernelTester()
230 .batch_size(batch_size)
231 .inplace_b(true)
232 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
233 }
234 }
235
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,inplace_a_and_b)236 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, inplace_a_and_b) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239 VAddMicrokernelTester()
240 .batch_size(batch_size)
241 .inplace_a(true)
242 .inplace_b(true)
243 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
244 }
245 }
246
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,a_zero_point)247 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_zero_point) {
248 TEST_REQUIRES_ARM_NEON;
249 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251 VAddMicrokernelTester()
252 .batch_size(batch_size)
253 .a_zero_point(a_zero_point)
254 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
255 }
256 }
257 }
258
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,b_zero_point)259 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_zero_point) {
260 TEST_REQUIRES_ARM_NEON;
261 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263 VAddMicrokernelTester()
264 .batch_size(batch_size)
265 .b_zero_point(b_zero_point)
266 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
267 }
268 }
269 }
270
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,y_zero_point)271 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_zero_point) {
272 TEST_REQUIRES_ARM_NEON;
273 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275 VAddMicrokernelTester()
276 .batch_size(batch_size)
277 .y_zero_point(y_zero_point)
278 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
279 }
280 }
281 }
282
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,a_scale)283 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, a_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287 VAddMicrokernelTester()
288 .batch_size(batch_size)
289 .a_scale(a_scale)
290 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
291 }
292 }
293 }
294
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,b_scale)295 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, b_scale) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299 VAddMicrokernelTester()
300 .batch_size(batch_size)
301 .b_scale(b_scale)
302 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
303 }
304 }
305 }
306
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,y_scale)307 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, y_scale) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311 VAddMicrokernelTester()
312 .batch_size(batch_size)
313 .y_scale(y_scale)
314 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
315 }
316 }
317 }
318
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,qmin)319 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322 VAddMicrokernelTester()
323 .batch_size(batch_size)
324 .qmin(128)
325 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
326 }
327 }
328
TEST(QS8_VADD_MINMAX__NEON_LD64_X16,qmax)329 TEST(QS8_VADD_MINMAX__NEON_LD64_X16, qmax) {
330 TEST_REQUIRES_ARM_NEON;
331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332 VAddMicrokernelTester()
333 .batch_size(batch_size)
334 .qmax(128)
335 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
336 }
337 }
338 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
339
340
341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_eq_24)342 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_eq_24) {
343 TEST_REQUIRES_ARM_NEON;
344 VAddMicrokernelTester()
345 .batch_size(24)
346 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
347 }
348
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_div_24)349 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_div_24) {
350 TEST_REQUIRES_ARM_NEON;
351 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
352 VAddMicrokernelTester()
353 .batch_size(batch_size)
354 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
355 }
356 }
357
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_lt_24)358 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_lt_24) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
361 VAddMicrokernelTester()
362 .batch_size(batch_size)
363 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
364 }
365 }
366
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,batch_gt_24)367 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, batch_gt_24) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
370 VAddMicrokernelTester()
371 .batch_size(batch_size)
372 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
373 }
374 }
375
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,inplace_a)376 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
379 VAddMicrokernelTester()
380 .batch_size(batch_size)
381 .inplace_a(true)
382 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
383 }
384 }
385
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,inplace_b)386 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_b) {
387 TEST_REQUIRES_ARM_NEON;
388 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
389 VAddMicrokernelTester()
390 .batch_size(batch_size)
391 .inplace_b(true)
392 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
393 }
394 }
395
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,inplace_a_and_b)396 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, inplace_a_and_b) {
397 TEST_REQUIRES_ARM_NEON;
398 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
399 VAddMicrokernelTester()
400 .batch_size(batch_size)
401 .inplace_a(true)
402 .inplace_b(true)
403 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
404 }
405 }
406
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,a_zero_point)407 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_zero_point) {
408 TEST_REQUIRES_ARM_NEON;
409 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411 VAddMicrokernelTester()
412 .batch_size(batch_size)
413 .a_zero_point(a_zero_point)
414 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
415 }
416 }
417 }
418
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,b_zero_point)419 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_zero_point) {
420 TEST_REQUIRES_ARM_NEON;
421 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423 VAddMicrokernelTester()
424 .batch_size(batch_size)
425 .b_zero_point(b_zero_point)
426 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
427 }
428 }
429 }
430
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,y_zero_point)431 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_zero_point) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435 VAddMicrokernelTester()
436 .batch_size(batch_size)
437 .y_zero_point(y_zero_point)
438 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
439 }
440 }
441 }
442
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,a_scale)443 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, a_scale) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447 VAddMicrokernelTester()
448 .batch_size(batch_size)
449 .a_scale(a_scale)
450 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
451 }
452 }
453 }
454
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,b_scale)455 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, b_scale) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459 VAddMicrokernelTester()
460 .batch_size(batch_size)
461 .b_scale(b_scale)
462 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
463 }
464 }
465 }
466
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,y_scale)467 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, y_scale) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471 VAddMicrokernelTester()
472 .batch_size(batch_size)
473 .y_scale(y_scale)
474 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
475 }
476 }
477 }
478
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,qmin)479 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmin) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
482 VAddMicrokernelTester()
483 .batch_size(batch_size)
484 .qmin(128)
485 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
486 }
487 }
488
TEST(QS8_VADD_MINMAX__NEON_LD64_X24,qmax)489 TEST(QS8_VADD_MINMAX__NEON_LD64_X24, qmax) {
490 TEST_REQUIRES_ARM_NEON;
491 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
492 VAddMicrokernelTester()
493 .batch_size(batch_size)
494 .qmax(128)
495 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
496 }
497 }
498 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
499
500
501 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_eq_32)502 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_eq_32) {
503 TEST_REQUIRES_ARM_NEON;
504 VAddMicrokernelTester()
505 .batch_size(32)
506 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
507 }
508
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_div_32)509 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_div_32) {
510 TEST_REQUIRES_ARM_NEON;
511 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
512 VAddMicrokernelTester()
513 .batch_size(batch_size)
514 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
515 }
516 }
517
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_lt_32)518 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_lt_32) {
519 TEST_REQUIRES_ARM_NEON;
520 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
521 VAddMicrokernelTester()
522 .batch_size(batch_size)
523 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
524 }
525 }
526
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,batch_gt_32)527 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, batch_gt_32) {
528 TEST_REQUIRES_ARM_NEON;
529 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
530 VAddMicrokernelTester()
531 .batch_size(batch_size)
532 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
533 }
534 }
535
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,inplace_a)536 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a) {
537 TEST_REQUIRES_ARM_NEON;
538 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
539 VAddMicrokernelTester()
540 .batch_size(batch_size)
541 .inplace_a(true)
542 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
543 }
544 }
545
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,inplace_b)546 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_b) {
547 TEST_REQUIRES_ARM_NEON;
548 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
549 VAddMicrokernelTester()
550 .batch_size(batch_size)
551 .inplace_b(true)
552 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
553 }
554 }
555
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,inplace_a_and_b)556 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, inplace_a_and_b) {
557 TEST_REQUIRES_ARM_NEON;
558 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
559 VAddMicrokernelTester()
560 .batch_size(batch_size)
561 .inplace_a(true)
562 .inplace_b(true)
563 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
564 }
565 }
566
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,a_zero_point)567 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_zero_point) {
568 TEST_REQUIRES_ARM_NEON;
569 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571 VAddMicrokernelTester()
572 .batch_size(batch_size)
573 .a_zero_point(a_zero_point)
574 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
575 }
576 }
577 }
578
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,b_zero_point)579 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_zero_point) {
580 TEST_REQUIRES_ARM_NEON;
581 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583 VAddMicrokernelTester()
584 .batch_size(batch_size)
585 .b_zero_point(b_zero_point)
586 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
587 }
588 }
589 }
590
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,y_zero_point)591 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_zero_point) {
592 TEST_REQUIRES_ARM_NEON;
593 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595 VAddMicrokernelTester()
596 .batch_size(batch_size)
597 .y_zero_point(y_zero_point)
598 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
599 }
600 }
601 }
602
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,a_scale)603 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, a_scale) {
604 TEST_REQUIRES_ARM_NEON;
605 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607 VAddMicrokernelTester()
608 .batch_size(batch_size)
609 .a_scale(a_scale)
610 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
611 }
612 }
613 }
614
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,b_scale)615 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, b_scale) {
616 TEST_REQUIRES_ARM_NEON;
617 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619 VAddMicrokernelTester()
620 .batch_size(batch_size)
621 .b_scale(b_scale)
622 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
623 }
624 }
625 }
626
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,y_scale)627 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, y_scale) {
628 TEST_REQUIRES_ARM_NEON;
629 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631 VAddMicrokernelTester()
632 .batch_size(batch_size)
633 .y_scale(y_scale)
634 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
635 }
636 }
637 }
638
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,qmin)639 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmin) {
640 TEST_REQUIRES_ARM_NEON;
641 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
642 VAddMicrokernelTester()
643 .batch_size(batch_size)
644 .qmin(128)
645 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
646 }
647 }
648
TEST(QS8_VADD_MINMAX__NEON_LD64_X32,qmax)649 TEST(QS8_VADD_MINMAX__NEON_LD64_X32, qmax) {
650 TEST_REQUIRES_ARM_NEON;
651 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
652 VAddMicrokernelTester()
653 .batch_size(batch_size)
654 .qmax(128)
655 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
656 }
657 }
658 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
659
660
661 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_eq_16)662 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_eq_16) {
663 TEST_REQUIRES_ARM_NEON;
664 VAddMicrokernelTester()
665 .batch_size(16)
666 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
667 }
668
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_div_16)669 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_div_16) {
670 TEST_REQUIRES_ARM_NEON;
671 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
672 VAddMicrokernelTester()
673 .batch_size(batch_size)
674 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
675 }
676 }
677
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_lt_16)678 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_lt_16) {
679 TEST_REQUIRES_ARM_NEON;
680 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
681 VAddMicrokernelTester()
682 .batch_size(batch_size)
683 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
684 }
685 }
686
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,batch_gt_16)687 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, batch_gt_16) {
688 TEST_REQUIRES_ARM_NEON;
689 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
690 VAddMicrokernelTester()
691 .batch_size(batch_size)
692 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
693 }
694 }
695
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,inplace_a)696 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_a) {
697 TEST_REQUIRES_ARM_NEON;
698 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
699 VAddMicrokernelTester()
700 .batch_size(batch_size)
701 .inplace_a(true)
702 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
703 }
704 }
705
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,inplace_b)706 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_b) {
707 TEST_REQUIRES_ARM_NEON;
708 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
709 VAddMicrokernelTester()
710 .batch_size(batch_size)
711 .inplace_b(true)
712 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
713 }
714 }
715
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,inplace_a_and_b)716 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, inplace_a_and_b) {
717 TEST_REQUIRES_ARM_NEON;
718 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
719 VAddMicrokernelTester()
720 .batch_size(batch_size)
721 .inplace_a(true)
722 .inplace_b(true)
723 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
724 }
725 }
726
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,a_zero_point)727 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, a_zero_point) {
728 TEST_REQUIRES_ARM_NEON;
729 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731 VAddMicrokernelTester()
732 .batch_size(batch_size)
733 .a_zero_point(a_zero_point)
734 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
735 }
736 }
737 }
738
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,b_zero_point)739 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, b_zero_point) {
740 TEST_REQUIRES_ARM_NEON;
741 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743 VAddMicrokernelTester()
744 .batch_size(batch_size)
745 .b_zero_point(b_zero_point)
746 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
747 }
748 }
749 }
750
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,y_zero_point)751 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, y_zero_point) {
752 TEST_REQUIRES_ARM_NEON;
753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755 VAddMicrokernelTester()
756 .batch_size(batch_size)
757 .y_zero_point(y_zero_point)
758 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
759 }
760 }
761 }
762
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,a_scale)763 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, a_scale) {
764 TEST_REQUIRES_ARM_NEON;
765 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767 VAddMicrokernelTester()
768 .batch_size(batch_size)
769 .a_scale(a_scale)
770 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
771 }
772 }
773 }
774
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,b_scale)775 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, b_scale) {
776 TEST_REQUIRES_ARM_NEON;
777 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779 VAddMicrokernelTester()
780 .batch_size(batch_size)
781 .b_scale(b_scale)
782 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
783 }
784 }
785 }
786
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,y_scale)787 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, y_scale) {
788 TEST_REQUIRES_ARM_NEON;
789 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791 VAddMicrokernelTester()
792 .batch_size(batch_size)
793 .y_scale(y_scale)
794 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
795 }
796 }
797 }
798
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,qmin)799 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, qmin) {
800 TEST_REQUIRES_ARM_NEON;
801 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
802 VAddMicrokernelTester()
803 .batch_size(batch_size)
804 .qmin(128)
805 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
806 }
807 }
808
TEST(QS8_VADD_MINMAX__NEON_LD128_X16,qmax)809 TEST(QS8_VADD_MINMAX__NEON_LD128_X16, qmax) {
810 TEST_REQUIRES_ARM_NEON;
811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812 VAddMicrokernelTester()
813 .batch_size(batch_size)
814 .qmax(128)
815 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
816 }
817 }
818 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
819
820
821 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_eq_32)822 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_eq_32) {
823 TEST_REQUIRES_ARM_NEON;
824 VAddMicrokernelTester()
825 .batch_size(32)
826 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
827 }
828
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_div_32)829 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_div_32) {
830 TEST_REQUIRES_ARM_NEON;
831 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
832 VAddMicrokernelTester()
833 .batch_size(batch_size)
834 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
835 }
836 }
837
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_lt_32)838 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_lt_32) {
839 TEST_REQUIRES_ARM_NEON;
840 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
841 VAddMicrokernelTester()
842 .batch_size(batch_size)
843 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
844 }
845 }
846
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,batch_gt_32)847 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, batch_gt_32) {
848 TEST_REQUIRES_ARM_NEON;
849 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
850 VAddMicrokernelTester()
851 .batch_size(batch_size)
852 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
853 }
854 }
855
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,inplace_a)856 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_a) {
857 TEST_REQUIRES_ARM_NEON;
858 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
859 VAddMicrokernelTester()
860 .batch_size(batch_size)
861 .inplace_a(true)
862 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
863 }
864 }
865
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,inplace_b)866 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_b) {
867 TEST_REQUIRES_ARM_NEON;
868 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
869 VAddMicrokernelTester()
870 .batch_size(batch_size)
871 .inplace_b(true)
872 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
873 }
874 }
875
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,inplace_a_and_b)876 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, inplace_a_and_b) {
877 TEST_REQUIRES_ARM_NEON;
878 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
879 VAddMicrokernelTester()
880 .batch_size(batch_size)
881 .inplace_a(true)
882 .inplace_b(true)
883 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
884 }
885 }
886
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,a_zero_point)887 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, a_zero_point) {
888 TEST_REQUIRES_ARM_NEON;
889 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891 VAddMicrokernelTester()
892 .batch_size(batch_size)
893 .a_zero_point(a_zero_point)
894 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
895 }
896 }
897 }
898
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,b_zero_point)899 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, b_zero_point) {
900 TEST_REQUIRES_ARM_NEON;
901 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903 VAddMicrokernelTester()
904 .batch_size(batch_size)
905 .b_zero_point(b_zero_point)
906 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
907 }
908 }
909 }
910
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,y_zero_point)911 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, y_zero_point) {
912 TEST_REQUIRES_ARM_NEON;
913 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915 VAddMicrokernelTester()
916 .batch_size(batch_size)
917 .y_zero_point(y_zero_point)
918 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
919 }
920 }
921 }
922
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,a_scale)923 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, a_scale) {
924 TEST_REQUIRES_ARM_NEON;
925 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927 VAddMicrokernelTester()
928 .batch_size(batch_size)
929 .a_scale(a_scale)
930 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
931 }
932 }
933 }
934
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,b_scale)935 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, b_scale) {
936 TEST_REQUIRES_ARM_NEON;
937 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939 VAddMicrokernelTester()
940 .batch_size(batch_size)
941 .b_scale(b_scale)
942 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
943 }
944 }
945 }
946
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,y_scale)947 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, y_scale) {
948 TEST_REQUIRES_ARM_NEON;
949 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951 VAddMicrokernelTester()
952 .batch_size(batch_size)
953 .y_scale(y_scale)
954 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
955 }
956 }
957 }
958
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,qmin)959 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, qmin) {
960 TEST_REQUIRES_ARM_NEON;
961 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
962 VAddMicrokernelTester()
963 .batch_size(batch_size)
964 .qmin(128)
965 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
966 }
967 }
968
TEST(QS8_VADD_MINMAX__NEON_LD128_X32,qmax)969 TEST(QS8_VADD_MINMAX__NEON_LD128_X32, qmax) {
970 TEST_REQUIRES_ARM_NEON;
971 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
972 VAddMicrokernelTester()
973 .batch_size(batch_size)
974 .qmax(128)
975 .Test(xnn_qs8_vadd_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
976 }
977 }
978 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
979
980
981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_eq_8)982 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
983 TEST_REQUIRES_X86_SSE2;
984 VAddMicrokernelTester()
985 .batch_size(8)
986 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
987 }
988
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_div_8)989 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
990 TEST_REQUIRES_X86_SSE2;
991 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992 VAddMicrokernelTester()
993 .batch_size(batch_size)
994 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
995 }
996 }
997
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_lt_8)998 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
999 TEST_REQUIRES_X86_SSE2;
1000 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001 VAddMicrokernelTester()
1002 .batch_size(batch_size)
1003 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1004 }
1005 }
1006
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,batch_gt_8)1007 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
1008 TEST_REQUIRES_X86_SSE2;
1009 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010 VAddMicrokernelTester()
1011 .batch_size(batch_size)
1012 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1013 }
1014 }
1015
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_a)1016 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a) {
1017 TEST_REQUIRES_X86_SSE2;
1018 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019 VAddMicrokernelTester()
1020 .batch_size(batch_size)
1021 .inplace_a(true)
1022 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1023 }
1024 }
1025
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_b)1026 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_b) {
1027 TEST_REQUIRES_X86_SSE2;
1028 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029 VAddMicrokernelTester()
1030 .batch_size(batch_size)
1031 .inplace_b(true)
1032 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1033 }
1034 }
1035
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,inplace_a_and_b)1036 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
1037 TEST_REQUIRES_X86_SSE2;
1038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039 VAddMicrokernelTester()
1040 .batch_size(batch_size)
1041 .inplace_a(true)
1042 .inplace_b(true)
1043 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1044 }
1045 }
1046
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,a_zero_point)1047 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
1048 TEST_REQUIRES_X86_SSE2;
1049 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051 VAddMicrokernelTester()
1052 .batch_size(batch_size)
1053 .a_zero_point(a_zero_point)
1054 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1055 }
1056 }
1057 }
1058
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,b_zero_point)1059 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
1060 TEST_REQUIRES_X86_SSE2;
1061 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063 VAddMicrokernelTester()
1064 .batch_size(batch_size)
1065 .b_zero_point(b_zero_point)
1066 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1067 }
1068 }
1069 }
1070
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,y_zero_point)1071 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
1072 TEST_REQUIRES_X86_SSE2;
1073 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075 VAddMicrokernelTester()
1076 .batch_size(batch_size)
1077 .y_zero_point(y_zero_point)
1078 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1079 }
1080 }
1081 }
1082
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,a_scale)1083 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
1084 TEST_REQUIRES_X86_SSE2;
1085 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087 VAddMicrokernelTester()
1088 .batch_size(batch_size)
1089 .a_scale(a_scale)
1090 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1091 }
1092 }
1093 }
1094
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,b_scale)1095 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
1096 TEST_REQUIRES_X86_SSE2;
1097 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099 VAddMicrokernelTester()
1100 .batch_size(batch_size)
1101 .b_scale(b_scale)
1102 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1103 }
1104 }
1105 }
1106
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,y_scale)1107 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
1108 TEST_REQUIRES_X86_SSE2;
1109 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111 VAddMicrokernelTester()
1112 .batch_size(batch_size)
1113 .y_scale(y_scale)
1114 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1115 }
1116 }
1117 }
1118
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,qmin)1119 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
1120 TEST_REQUIRES_X86_SSE2;
1121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122 VAddMicrokernelTester()
1123 .batch_size(batch_size)
1124 .qmin(128)
1125 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1126 }
1127 }
1128
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8,qmax)1129 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
1130 TEST_REQUIRES_X86_SSE2;
1131 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132 VAddMicrokernelTester()
1133 .batch_size(batch_size)
1134 .qmax(128)
1135 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
1136 }
1137 }
1138 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139
1140
1141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_eq_16)1142 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
1143 TEST_REQUIRES_X86_SSE2;
1144 VAddMicrokernelTester()
1145 .batch_size(16)
1146 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1147 }
1148
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_div_16)1149 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
1150 TEST_REQUIRES_X86_SSE2;
1151 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152 VAddMicrokernelTester()
1153 .batch_size(batch_size)
1154 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1155 }
1156 }
1157
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_lt_16)1158 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
1159 TEST_REQUIRES_X86_SSE2;
1160 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161 VAddMicrokernelTester()
1162 .batch_size(batch_size)
1163 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1164 }
1165 }
1166
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,batch_gt_16)1167 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
1168 TEST_REQUIRES_X86_SSE2;
1169 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170 VAddMicrokernelTester()
1171 .batch_size(batch_size)
1172 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1173 }
1174 }
1175
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_a)1176 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a) {
1177 TEST_REQUIRES_X86_SSE2;
1178 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179 VAddMicrokernelTester()
1180 .batch_size(batch_size)
1181 .inplace_a(true)
1182 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1183 }
1184 }
1185
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_b)1186 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_b) {
1187 TEST_REQUIRES_X86_SSE2;
1188 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189 VAddMicrokernelTester()
1190 .batch_size(batch_size)
1191 .inplace_b(true)
1192 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1193 }
1194 }
1195
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,inplace_a_and_b)1196 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
1197 TEST_REQUIRES_X86_SSE2;
1198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199 VAddMicrokernelTester()
1200 .batch_size(batch_size)
1201 .inplace_a(true)
1202 .inplace_b(true)
1203 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1204 }
1205 }
1206
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,a_zero_point)1207 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
1208 TEST_REQUIRES_X86_SSE2;
1209 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211 VAddMicrokernelTester()
1212 .batch_size(batch_size)
1213 .a_zero_point(a_zero_point)
1214 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1215 }
1216 }
1217 }
1218
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,b_zero_point)1219 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
1220 TEST_REQUIRES_X86_SSE2;
1221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223 VAddMicrokernelTester()
1224 .batch_size(batch_size)
1225 .b_zero_point(b_zero_point)
1226 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1227 }
1228 }
1229 }
1230
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,y_zero_point)1231 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
1232 TEST_REQUIRES_X86_SSE2;
1233 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235 VAddMicrokernelTester()
1236 .batch_size(batch_size)
1237 .y_zero_point(y_zero_point)
1238 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1239 }
1240 }
1241 }
1242
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,a_scale)1243 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
1244 TEST_REQUIRES_X86_SSE2;
1245 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247 VAddMicrokernelTester()
1248 .batch_size(batch_size)
1249 .a_scale(a_scale)
1250 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1251 }
1252 }
1253 }
1254
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,b_scale)1255 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
1256 TEST_REQUIRES_X86_SSE2;
1257 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259 VAddMicrokernelTester()
1260 .batch_size(batch_size)
1261 .b_scale(b_scale)
1262 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1263 }
1264 }
1265 }
1266
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,y_scale)1267 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
1268 TEST_REQUIRES_X86_SSE2;
1269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271 VAddMicrokernelTester()
1272 .batch_size(batch_size)
1273 .y_scale(y_scale)
1274 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1275 }
1276 }
1277 }
1278
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,qmin)1279 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
1280 TEST_REQUIRES_X86_SSE2;
1281 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282 VAddMicrokernelTester()
1283 .batch_size(batch_size)
1284 .qmin(128)
1285 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1286 }
1287 }
1288
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16,qmax)1289 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
1290 TEST_REQUIRES_X86_SSE2;
1291 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292 VAddMicrokernelTester()
1293 .batch_size(batch_size)
1294 .qmax(128)
1295 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1296 }
1297 }
1298 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299
1300
1301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_eq_24)1302 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
1303 TEST_REQUIRES_X86_SSE2;
1304 VAddMicrokernelTester()
1305 .batch_size(24)
1306 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1307 }
1308
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_div_24)1309 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
1310 TEST_REQUIRES_X86_SSE2;
1311 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1312 VAddMicrokernelTester()
1313 .batch_size(batch_size)
1314 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1315 }
1316 }
1317
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_lt_24)1318 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
1319 TEST_REQUIRES_X86_SSE2;
1320 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1321 VAddMicrokernelTester()
1322 .batch_size(batch_size)
1323 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1324 }
1325 }
1326
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,batch_gt_24)1327 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
1328 TEST_REQUIRES_X86_SSE2;
1329 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1330 VAddMicrokernelTester()
1331 .batch_size(batch_size)
1332 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1333 }
1334 }
1335
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,inplace_a)1336 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a) {
1337 TEST_REQUIRES_X86_SSE2;
1338 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1339 VAddMicrokernelTester()
1340 .batch_size(batch_size)
1341 .inplace_a(true)
1342 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1343 }
1344 }
1345
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,inplace_b)1346 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_b) {
1347 TEST_REQUIRES_X86_SSE2;
1348 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1349 VAddMicrokernelTester()
1350 .batch_size(batch_size)
1351 .inplace_b(true)
1352 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1353 }
1354 }
1355
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,inplace_a_and_b)1356 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, inplace_a_and_b) {
1357 TEST_REQUIRES_X86_SSE2;
1358 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1359 VAddMicrokernelTester()
1360 .batch_size(batch_size)
1361 .inplace_a(true)
1362 .inplace_b(true)
1363 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1364 }
1365 }
1366
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,a_zero_point)1367 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
1368 TEST_REQUIRES_X86_SSE2;
1369 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371 VAddMicrokernelTester()
1372 .batch_size(batch_size)
1373 .a_zero_point(a_zero_point)
1374 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1375 }
1376 }
1377 }
1378
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,b_zero_point)1379 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
1380 TEST_REQUIRES_X86_SSE2;
1381 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383 VAddMicrokernelTester()
1384 .batch_size(batch_size)
1385 .b_zero_point(b_zero_point)
1386 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1387 }
1388 }
1389 }
1390
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,y_zero_point)1391 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
1392 TEST_REQUIRES_X86_SSE2;
1393 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395 VAddMicrokernelTester()
1396 .batch_size(batch_size)
1397 .y_zero_point(y_zero_point)
1398 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1399 }
1400 }
1401 }
1402
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,a_scale)1403 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
1404 TEST_REQUIRES_X86_SSE2;
1405 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407 VAddMicrokernelTester()
1408 .batch_size(batch_size)
1409 .a_scale(a_scale)
1410 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1411 }
1412 }
1413 }
1414
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,b_scale)1415 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
1416 TEST_REQUIRES_X86_SSE2;
1417 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419 VAddMicrokernelTester()
1420 .batch_size(batch_size)
1421 .b_scale(b_scale)
1422 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1423 }
1424 }
1425 }
1426
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,y_scale)1427 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
1428 TEST_REQUIRES_X86_SSE2;
1429 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431 VAddMicrokernelTester()
1432 .batch_size(batch_size)
1433 .y_scale(y_scale)
1434 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1435 }
1436 }
1437 }
1438
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,qmin)1439 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
1440 TEST_REQUIRES_X86_SSE2;
1441 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1442 VAddMicrokernelTester()
1443 .batch_size(batch_size)
1444 .qmin(128)
1445 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1446 }
1447 }
1448
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24,qmax)1449 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
1450 TEST_REQUIRES_X86_SSE2;
1451 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1452 VAddMicrokernelTester()
1453 .batch_size(batch_size)
1454 .qmax(128)
1455 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1456 }
1457 }
1458 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459
1460
1461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_eq_32)1462 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
1463 TEST_REQUIRES_X86_SSE2;
1464 VAddMicrokernelTester()
1465 .batch_size(32)
1466 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1467 }
1468
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_div_32)1469 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1470 TEST_REQUIRES_X86_SSE2;
1471 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1472 VAddMicrokernelTester()
1473 .batch_size(batch_size)
1474 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1475 }
1476 }
1477
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_lt_32)1478 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1479 TEST_REQUIRES_X86_SSE2;
1480 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1481 VAddMicrokernelTester()
1482 .batch_size(batch_size)
1483 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1484 }
1485 }
1486
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,batch_gt_32)1487 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1488 TEST_REQUIRES_X86_SSE2;
1489 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1490 VAddMicrokernelTester()
1491 .batch_size(batch_size)
1492 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1493 }
1494 }
1495
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,inplace_a)1496 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a) {
1497 TEST_REQUIRES_X86_SSE2;
1498 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1499 VAddMicrokernelTester()
1500 .batch_size(batch_size)
1501 .inplace_a(true)
1502 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1503 }
1504 }
1505
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,inplace_b)1506 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_b) {
1507 TEST_REQUIRES_X86_SSE2;
1508 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1509 VAddMicrokernelTester()
1510 .batch_size(batch_size)
1511 .inplace_b(true)
1512 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1513 }
1514 }
1515
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,inplace_a_and_b)1516 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, inplace_a_and_b) {
1517 TEST_REQUIRES_X86_SSE2;
1518 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1519 VAddMicrokernelTester()
1520 .batch_size(batch_size)
1521 .inplace_a(true)
1522 .inplace_b(true)
1523 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1524 }
1525 }
1526
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,a_zero_point)1527 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1528 TEST_REQUIRES_X86_SSE2;
1529 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531 VAddMicrokernelTester()
1532 .batch_size(batch_size)
1533 .a_zero_point(a_zero_point)
1534 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1535 }
1536 }
1537 }
1538
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,b_zero_point)1539 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1540 TEST_REQUIRES_X86_SSE2;
1541 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543 VAddMicrokernelTester()
1544 .batch_size(batch_size)
1545 .b_zero_point(b_zero_point)
1546 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1547 }
1548 }
1549 }
1550
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,y_zero_point)1551 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1552 TEST_REQUIRES_X86_SSE2;
1553 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555 VAddMicrokernelTester()
1556 .batch_size(batch_size)
1557 .y_zero_point(y_zero_point)
1558 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1559 }
1560 }
1561 }
1562
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,a_scale)1563 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1564 TEST_REQUIRES_X86_SSE2;
1565 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567 VAddMicrokernelTester()
1568 .batch_size(batch_size)
1569 .a_scale(a_scale)
1570 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1571 }
1572 }
1573 }
1574
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,b_scale)1575 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1576 TEST_REQUIRES_X86_SSE2;
1577 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579 VAddMicrokernelTester()
1580 .batch_size(batch_size)
1581 .b_scale(b_scale)
1582 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1583 }
1584 }
1585 }
1586
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,y_scale)1587 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1588 TEST_REQUIRES_X86_SSE2;
1589 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591 VAddMicrokernelTester()
1592 .batch_size(batch_size)
1593 .y_scale(y_scale)
1594 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1595 }
1596 }
1597 }
1598
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,qmin)1599 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1600 TEST_REQUIRES_X86_SSE2;
1601 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1602 VAddMicrokernelTester()
1603 .batch_size(batch_size)
1604 .qmin(128)
1605 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1606 }
1607 }
1608
TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32,qmax)1609 TEST(QS8_VADD_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1610 TEST_REQUIRES_X86_SSE2;
1611 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1612 VAddMicrokernelTester()
1613 .batch_size(batch_size)
1614 .qmax(128)
1615 .Test(xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1616 }
1617 }
1618 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619
1620
1621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_eq_8)1622 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1623 TEST_REQUIRES_X86_SSE41;
1624 VAddMicrokernelTester()
1625 .batch_size(8)
1626 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1627 }
1628
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_div_8)1629 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1630 TEST_REQUIRES_X86_SSE41;
1631 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632 VAddMicrokernelTester()
1633 .batch_size(batch_size)
1634 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1635 }
1636 }
1637
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_lt_8)1638 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1639 TEST_REQUIRES_X86_SSE41;
1640 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641 VAddMicrokernelTester()
1642 .batch_size(batch_size)
1643 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1644 }
1645 }
1646
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,batch_gt_8)1647 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1648 TEST_REQUIRES_X86_SSE41;
1649 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650 VAddMicrokernelTester()
1651 .batch_size(batch_size)
1652 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1653 }
1654 }
1655
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_a)1656 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a) {
1657 TEST_REQUIRES_X86_SSE41;
1658 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659 VAddMicrokernelTester()
1660 .batch_size(batch_size)
1661 .inplace_a(true)
1662 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1663 }
1664 }
1665
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_b)1666 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_b) {
1667 TEST_REQUIRES_X86_SSE41;
1668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669 VAddMicrokernelTester()
1670 .batch_size(batch_size)
1671 .inplace_b(true)
1672 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1673 }
1674 }
1675
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,inplace_a_and_b)1676 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1677 TEST_REQUIRES_X86_SSE41;
1678 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679 VAddMicrokernelTester()
1680 .batch_size(batch_size)
1681 .inplace_a(true)
1682 .inplace_b(true)
1683 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1684 }
1685 }
1686
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,a_zero_point)1687 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1688 TEST_REQUIRES_X86_SSE41;
1689 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691 VAddMicrokernelTester()
1692 .batch_size(batch_size)
1693 .a_zero_point(a_zero_point)
1694 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1695 }
1696 }
1697 }
1698
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,b_zero_point)1699 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1700 TEST_REQUIRES_X86_SSE41;
1701 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703 VAddMicrokernelTester()
1704 .batch_size(batch_size)
1705 .b_zero_point(b_zero_point)
1706 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1707 }
1708 }
1709 }
1710
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,y_zero_point)1711 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1712 TEST_REQUIRES_X86_SSE41;
1713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715 VAddMicrokernelTester()
1716 .batch_size(batch_size)
1717 .y_zero_point(y_zero_point)
1718 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1719 }
1720 }
1721 }
1722
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,a_scale)1723 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1724 TEST_REQUIRES_X86_SSE41;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727 VAddMicrokernelTester()
1728 .batch_size(batch_size)
1729 .a_scale(a_scale)
1730 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1731 }
1732 }
1733 }
1734
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,b_scale)1735 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1736 TEST_REQUIRES_X86_SSE41;
1737 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739 VAddMicrokernelTester()
1740 .batch_size(batch_size)
1741 .b_scale(b_scale)
1742 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1743 }
1744 }
1745 }
1746
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,y_scale)1747 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1748 TEST_REQUIRES_X86_SSE41;
1749 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751 VAddMicrokernelTester()
1752 .batch_size(batch_size)
1753 .y_scale(y_scale)
1754 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1755 }
1756 }
1757 }
1758
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,qmin)1759 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1760 TEST_REQUIRES_X86_SSE41;
1761 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762 VAddMicrokernelTester()
1763 .batch_size(batch_size)
1764 .qmin(128)
1765 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1766 }
1767 }
1768
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8,qmax)1769 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1770 TEST_REQUIRES_X86_SSE41;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 VAddMicrokernelTester()
1773 .batch_size(batch_size)
1774 .qmax(128)
1775 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1776 }
1777 }
1778 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_eq_16)1782 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1783 TEST_REQUIRES_X86_SSE41;
1784 VAddMicrokernelTester()
1785 .batch_size(16)
1786 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1787 }
1788
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_div_16)1789 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1790 TEST_REQUIRES_X86_SSE41;
1791 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792 VAddMicrokernelTester()
1793 .batch_size(batch_size)
1794 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1795 }
1796 }
1797
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_lt_16)1798 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1799 TEST_REQUIRES_X86_SSE41;
1800 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801 VAddMicrokernelTester()
1802 .batch_size(batch_size)
1803 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1804 }
1805 }
1806
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,batch_gt_16)1807 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1808 TEST_REQUIRES_X86_SSE41;
1809 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810 VAddMicrokernelTester()
1811 .batch_size(batch_size)
1812 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1813 }
1814 }
1815
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_a)1816 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a) {
1817 TEST_REQUIRES_X86_SSE41;
1818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819 VAddMicrokernelTester()
1820 .batch_size(batch_size)
1821 .inplace_a(true)
1822 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1823 }
1824 }
1825
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_b)1826 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_b) {
1827 TEST_REQUIRES_X86_SSE41;
1828 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829 VAddMicrokernelTester()
1830 .batch_size(batch_size)
1831 .inplace_b(true)
1832 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1833 }
1834 }
1835
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,inplace_a_and_b)1836 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1837 TEST_REQUIRES_X86_SSE41;
1838 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839 VAddMicrokernelTester()
1840 .batch_size(batch_size)
1841 .inplace_a(true)
1842 .inplace_b(true)
1843 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1844 }
1845 }
1846
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,a_zero_point)1847 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1848 TEST_REQUIRES_X86_SSE41;
1849 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851 VAddMicrokernelTester()
1852 .batch_size(batch_size)
1853 .a_zero_point(a_zero_point)
1854 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1855 }
1856 }
1857 }
1858
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,b_zero_point)1859 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1860 TEST_REQUIRES_X86_SSE41;
1861 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863 VAddMicrokernelTester()
1864 .batch_size(batch_size)
1865 .b_zero_point(b_zero_point)
1866 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1867 }
1868 }
1869 }
1870
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,y_zero_point)1871 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1872 TEST_REQUIRES_X86_SSE41;
1873 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875 VAddMicrokernelTester()
1876 .batch_size(batch_size)
1877 .y_zero_point(y_zero_point)
1878 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1879 }
1880 }
1881 }
1882
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,a_scale)1883 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1884 TEST_REQUIRES_X86_SSE41;
1885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887 VAddMicrokernelTester()
1888 .batch_size(batch_size)
1889 .a_scale(a_scale)
1890 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1891 }
1892 }
1893 }
1894
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,b_scale)1895 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1896 TEST_REQUIRES_X86_SSE41;
1897 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899 VAddMicrokernelTester()
1900 .batch_size(batch_size)
1901 .b_scale(b_scale)
1902 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1903 }
1904 }
1905 }
1906
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,y_scale)1907 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1908 TEST_REQUIRES_X86_SSE41;
1909 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911 VAddMicrokernelTester()
1912 .batch_size(batch_size)
1913 .y_scale(y_scale)
1914 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1915 }
1916 }
1917 }
1918
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,qmin)1919 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1920 TEST_REQUIRES_X86_SSE41;
1921 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922 VAddMicrokernelTester()
1923 .batch_size(batch_size)
1924 .qmin(128)
1925 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1926 }
1927 }
1928
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16,qmax)1929 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1930 TEST_REQUIRES_X86_SSE41;
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VAddMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
1935 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1936 }
1937 }
1938 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939
1940
1941 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_eq_24)1942 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1943 TEST_REQUIRES_X86_SSE41;
1944 VAddMicrokernelTester()
1945 .batch_size(24)
1946 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1947 }
1948
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_div_24)1949 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1950 TEST_REQUIRES_X86_SSE41;
1951 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1952 VAddMicrokernelTester()
1953 .batch_size(batch_size)
1954 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1955 }
1956 }
1957
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_lt_24)1958 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1959 TEST_REQUIRES_X86_SSE41;
1960 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1961 VAddMicrokernelTester()
1962 .batch_size(batch_size)
1963 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1964 }
1965 }
1966
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,batch_gt_24)1967 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1968 TEST_REQUIRES_X86_SSE41;
1969 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1970 VAddMicrokernelTester()
1971 .batch_size(batch_size)
1972 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1973 }
1974 }
1975
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,inplace_a)1976 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a) {
1977 TEST_REQUIRES_X86_SSE41;
1978 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1979 VAddMicrokernelTester()
1980 .batch_size(batch_size)
1981 .inplace_a(true)
1982 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1983 }
1984 }
1985
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,inplace_b)1986 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_b) {
1987 TEST_REQUIRES_X86_SSE41;
1988 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1989 VAddMicrokernelTester()
1990 .batch_size(batch_size)
1991 .inplace_b(true)
1992 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1993 }
1994 }
1995
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,inplace_a_and_b)1996 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, inplace_a_and_b) {
1997 TEST_REQUIRES_X86_SSE41;
1998 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1999 VAddMicrokernelTester()
2000 .batch_size(batch_size)
2001 .inplace_a(true)
2002 .inplace_b(true)
2003 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2004 }
2005 }
2006
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,a_zero_point)2007 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
2008 TEST_REQUIRES_X86_SSE41;
2009 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2010 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2011 VAddMicrokernelTester()
2012 .batch_size(batch_size)
2013 .a_zero_point(a_zero_point)
2014 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2015 }
2016 }
2017 }
2018
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,b_zero_point)2019 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
2020 TEST_REQUIRES_X86_SSE41;
2021 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2022 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2023 VAddMicrokernelTester()
2024 .batch_size(batch_size)
2025 .b_zero_point(b_zero_point)
2026 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2027 }
2028 }
2029 }
2030
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,y_zero_point)2031 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
2032 TEST_REQUIRES_X86_SSE41;
2033 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2034 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2035 VAddMicrokernelTester()
2036 .batch_size(batch_size)
2037 .y_zero_point(y_zero_point)
2038 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2039 }
2040 }
2041 }
2042
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,a_scale)2043 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
2044 TEST_REQUIRES_X86_SSE41;
2045 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2046 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2047 VAddMicrokernelTester()
2048 .batch_size(batch_size)
2049 .a_scale(a_scale)
2050 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2051 }
2052 }
2053 }
2054
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,b_scale)2055 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
2056 TEST_REQUIRES_X86_SSE41;
2057 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2058 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2059 VAddMicrokernelTester()
2060 .batch_size(batch_size)
2061 .b_scale(b_scale)
2062 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2063 }
2064 }
2065 }
2066
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,y_scale)2067 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
2068 TEST_REQUIRES_X86_SSE41;
2069 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2070 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2071 VAddMicrokernelTester()
2072 .batch_size(batch_size)
2073 .y_scale(y_scale)
2074 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2075 }
2076 }
2077 }
2078
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,qmin)2079 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
2080 TEST_REQUIRES_X86_SSE41;
2081 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2082 VAddMicrokernelTester()
2083 .batch_size(batch_size)
2084 .qmin(128)
2085 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2086 }
2087 }
2088
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24,qmax)2089 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
2090 TEST_REQUIRES_X86_SSE41;
2091 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2092 VAddMicrokernelTester()
2093 .batch_size(batch_size)
2094 .qmax(128)
2095 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2096 }
2097 }
2098 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2099
2100
2101 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_eq_32)2102 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
2103 TEST_REQUIRES_X86_SSE41;
2104 VAddMicrokernelTester()
2105 .batch_size(32)
2106 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2107 }
2108
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_div_32)2109 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
2110 TEST_REQUIRES_X86_SSE41;
2111 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2112 VAddMicrokernelTester()
2113 .batch_size(batch_size)
2114 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2115 }
2116 }
2117
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_lt_32)2118 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
2119 TEST_REQUIRES_X86_SSE41;
2120 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2121 VAddMicrokernelTester()
2122 .batch_size(batch_size)
2123 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2124 }
2125 }
2126
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,batch_gt_32)2127 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
2128 TEST_REQUIRES_X86_SSE41;
2129 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2130 VAddMicrokernelTester()
2131 .batch_size(batch_size)
2132 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2133 }
2134 }
2135
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,inplace_a)2136 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a) {
2137 TEST_REQUIRES_X86_SSE41;
2138 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2139 VAddMicrokernelTester()
2140 .batch_size(batch_size)
2141 .inplace_a(true)
2142 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2143 }
2144 }
2145
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,inplace_b)2146 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_b) {
2147 TEST_REQUIRES_X86_SSE41;
2148 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2149 VAddMicrokernelTester()
2150 .batch_size(batch_size)
2151 .inplace_b(true)
2152 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2153 }
2154 }
2155
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,inplace_a_and_b)2156 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, inplace_a_and_b) {
2157 TEST_REQUIRES_X86_SSE41;
2158 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2159 VAddMicrokernelTester()
2160 .batch_size(batch_size)
2161 .inplace_a(true)
2162 .inplace_b(true)
2163 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2164 }
2165 }
2166
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,a_zero_point)2167 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
2168 TEST_REQUIRES_X86_SSE41;
2169 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2170 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2171 VAddMicrokernelTester()
2172 .batch_size(batch_size)
2173 .a_zero_point(a_zero_point)
2174 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2175 }
2176 }
2177 }
2178
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,b_zero_point)2179 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
2180 TEST_REQUIRES_X86_SSE41;
2181 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2182 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2183 VAddMicrokernelTester()
2184 .batch_size(batch_size)
2185 .b_zero_point(b_zero_point)
2186 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2187 }
2188 }
2189 }
2190
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,y_zero_point)2191 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
2192 TEST_REQUIRES_X86_SSE41;
2193 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2194 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2195 VAddMicrokernelTester()
2196 .batch_size(batch_size)
2197 .y_zero_point(y_zero_point)
2198 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2199 }
2200 }
2201 }
2202
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,a_scale)2203 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
2204 TEST_REQUIRES_X86_SSE41;
2205 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2206 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2207 VAddMicrokernelTester()
2208 .batch_size(batch_size)
2209 .a_scale(a_scale)
2210 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2211 }
2212 }
2213 }
2214
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,b_scale)2215 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
2216 TEST_REQUIRES_X86_SSE41;
2217 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2218 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2219 VAddMicrokernelTester()
2220 .batch_size(batch_size)
2221 .b_scale(b_scale)
2222 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2223 }
2224 }
2225 }
2226
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,y_scale)2227 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
2228 TEST_REQUIRES_X86_SSE41;
2229 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2230 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2231 VAddMicrokernelTester()
2232 .batch_size(batch_size)
2233 .y_scale(y_scale)
2234 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2235 }
2236 }
2237 }
2238
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,qmin)2239 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
2240 TEST_REQUIRES_X86_SSE41;
2241 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2242 VAddMicrokernelTester()
2243 .batch_size(batch_size)
2244 .qmin(128)
2245 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2246 }
2247 }
2248
TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32,qmax)2249 TEST(QS8_VADD_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
2250 TEST_REQUIRES_X86_SSE41;
2251 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2252 VAddMicrokernelTester()
2253 .batch_size(batch_size)
2254 .qmax(128)
2255 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2256 }
2257 }
2258 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259
2260
2261 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_eq_8)2262 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
2263 TEST_REQUIRES_X86_AVX;
2264 VAddMicrokernelTester()
2265 .batch_size(8)
2266 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2267 }
2268
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_div_8)2269 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
2270 TEST_REQUIRES_X86_AVX;
2271 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2272 VAddMicrokernelTester()
2273 .batch_size(batch_size)
2274 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2275 }
2276 }
2277
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_lt_8)2278 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
2279 TEST_REQUIRES_X86_AVX;
2280 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2281 VAddMicrokernelTester()
2282 .batch_size(batch_size)
2283 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2284 }
2285 }
2286
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,batch_gt_8)2287 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
2288 TEST_REQUIRES_X86_AVX;
2289 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2290 VAddMicrokernelTester()
2291 .batch_size(batch_size)
2292 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2293 }
2294 }
2295
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_a)2296 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a) {
2297 TEST_REQUIRES_X86_AVX;
2298 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2299 VAddMicrokernelTester()
2300 .batch_size(batch_size)
2301 .inplace_a(true)
2302 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2303 }
2304 }
2305
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_b)2306 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_b) {
2307 TEST_REQUIRES_X86_AVX;
2308 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2309 VAddMicrokernelTester()
2310 .batch_size(batch_size)
2311 .inplace_b(true)
2312 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2313 }
2314 }
2315
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,inplace_a_and_b)2316 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, inplace_a_and_b) {
2317 TEST_REQUIRES_X86_AVX;
2318 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2319 VAddMicrokernelTester()
2320 .batch_size(batch_size)
2321 .inplace_a(true)
2322 .inplace_b(true)
2323 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2324 }
2325 }
2326
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,a_zero_point)2327 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
2328 TEST_REQUIRES_X86_AVX;
2329 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2330 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2331 VAddMicrokernelTester()
2332 .batch_size(batch_size)
2333 .a_zero_point(a_zero_point)
2334 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2335 }
2336 }
2337 }
2338
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,b_zero_point)2339 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
2340 TEST_REQUIRES_X86_AVX;
2341 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2342 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2343 VAddMicrokernelTester()
2344 .batch_size(batch_size)
2345 .b_zero_point(b_zero_point)
2346 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2347 }
2348 }
2349 }
2350
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,y_zero_point)2351 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
2352 TEST_REQUIRES_X86_AVX;
2353 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2354 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2355 VAddMicrokernelTester()
2356 .batch_size(batch_size)
2357 .y_zero_point(y_zero_point)
2358 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2359 }
2360 }
2361 }
2362
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,a_scale)2363 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
2364 TEST_REQUIRES_X86_AVX;
2365 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2366 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2367 VAddMicrokernelTester()
2368 .batch_size(batch_size)
2369 .a_scale(a_scale)
2370 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2371 }
2372 }
2373 }
2374
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,b_scale)2375 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
2376 TEST_REQUIRES_X86_AVX;
2377 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2378 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2379 VAddMicrokernelTester()
2380 .batch_size(batch_size)
2381 .b_scale(b_scale)
2382 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2383 }
2384 }
2385 }
2386
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,y_scale)2387 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
2388 TEST_REQUIRES_X86_AVX;
2389 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2390 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2391 VAddMicrokernelTester()
2392 .batch_size(batch_size)
2393 .y_scale(y_scale)
2394 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2395 }
2396 }
2397 }
2398
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,qmin)2399 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmin) {
2400 TEST_REQUIRES_X86_AVX;
2401 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2402 VAddMicrokernelTester()
2403 .batch_size(batch_size)
2404 .qmin(128)
2405 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2406 }
2407 }
2408
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8,qmax)2409 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X8, qmax) {
2410 TEST_REQUIRES_X86_AVX;
2411 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2412 VAddMicrokernelTester()
2413 .batch_size(batch_size)
2414 .qmax(128)
2415 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2416 }
2417 }
2418 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2419
2420
2421 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_eq_16)2422 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
2423 TEST_REQUIRES_X86_AVX;
2424 VAddMicrokernelTester()
2425 .batch_size(16)
2426 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2427 }
2428
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_div_16)2429 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
2430 TEST_REQUIRES_X86_AVX;
2431 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2432 VAddMicrokernelTester()
2433 .batch_size(batch_size)
2434 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2435 }
2436 }
2437
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_lt_16)2438 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
2439 TEST_REQUIRES_X86_AVX;
2440 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2441 VAddMicrokernelTester()
2442 .batch_size(batch_size)
2443 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2444 }
2445 }
2446
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,batch_gt_16)2447 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
2448 TEST_REQUIRES_X86_AVX;
2449 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2450 VAddMicrokernelTester()
2451 .batch_size(batch_size)
2452 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2453 }
2454 }
2455
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_a)2456 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a) {
2457 TEST_REQUIRES_X86_AVX;
2458 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2459 VAddMicrokernelTester()
2460 .batch_size(batch_size)
2461 .inplace_a(true)
2462 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2463 }
2464 }
2465
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_b)2466 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_b) {
2467 TEST_REQUIRES_X86_AVX;
2468 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2469 VAddMicrokernelTester()
2470 .batch_size(batch_size)
2471 .inplace_b(true)
2472 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2473 }
2474 }
2475
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,inplace_a_and_b)2476 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, inplace_a_and_b) {
2477 TEST_REQUIRES_X86_AVX;
2478 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2479 VAddMicrokernelTester()
2480 .batch_size(batch_size)
2481 .inplace_a(true)
2482 .inplace_b(true)
2483 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2484 }
2485 }
2486
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,a_zero_point)2487 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
2488 TEST_REQUIRES_X86_AVX;
2489 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2490 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2491 VAddMicrokernelTester()
2492 .batch_size(batch_size)
2493 .a_zero_point(a_zero_point)
2494 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2495 }
2496 }
2497 }
2498
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,b_zero_point)2499 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
2500 TEST_REQUIRES_X86_AVX;
2501 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2502 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2503 VAddMicrokernelTester()
2504 .batch_size(batch_size)
2505 .b_zero_point(b_zero_point)
2506 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2507 }
2508 }
2509 }
2510
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,y_zero_point)2511 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
2512 TEST_REQUIRES_X86_AVX;
2513 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2515 VAddMicrokernelTester()
2516 .batch_size(batch_size)
2517 .y_zero_point(y_zero_point)
2518 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2519 }
2520 }
2521 }
2522
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,a_scale)2523 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
2524 TEST_REQUIRES_X86_AVX;
2525 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2526 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2527 VAddMicrokernelTester()
2528 .batch_size(batch_size)
2529 .a_scale(a_scale)
2530 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2531 }
2532 }
2533 }
2534
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,b_scale)2535 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
2536 TEST_REQUIRES_X86_AVX;
2537 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2538 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2539 VAddMicrokernelTester()
2540 .batch_size(batch_size)
2541 .b_scale(b_scale)
2542 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2543 }
2544 }
2545 }
2546
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,y_scale)2547 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
2548 TEST_REQUIRES_X86_AVX;
2549 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2550 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2551 VAddMicrokernelTester()
2552 .batch_size(batch_size)
2553 .y_scale(y_scale)
2554 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2555 }
2556 }
2557 }
2558
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,qmin)2559 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmin) {
2560 TEST_REQUIRES_X86_AVX;
2561 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2562 VAddMicrokernelTester()
2563 .batch_size(batch_size)
2564 .qmin(128)
2565 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2566 }
2567 }
2568
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16,qmax)2569 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X16, qmax) {
2570 TEST_REQUIRES_X86_AVX;
2571 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2572 VAddMicrokernelTester()
2573 .batch_size(batch_size)
2574 .qmax(128)
2575 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2576 }
2577 }
2578 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2579
2580
2581 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_eq_24)2582 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_eq_24) {
2583 TEST_REQUIRES_X86_AVX;
2584 VAddMicrokernelTester()
2585 .batch_size(24)
2586 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2587 }
2588
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_div_24)2589 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_div_24) {
2590 TEST_REQUIRES_X86_AVX;
2591 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2592 VAddMicrokernelTester()
2593 .batch_size(batch_size)
2594 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2595 }
2596 }
2597
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_lt_24)2598 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_lt_24) {
2599 TEST_REQUIRES_X86_AVX;
2600 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2601 VAddMicrokernelTester()
2602 .batch_size(batch_size)
2603 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2604 }
2605 }
2606
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,batch_gt_24)2607 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, batch_gt_24) {
2608 TEST_REQUIRES_X86_AVX;
2609 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2610 VAddMicrokernelTester()
2611 .batch_size(batch_size)
2612 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2613 }
2614 }
2615
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,inplace_a)2616 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_a) {
2617 TEST_REQUIRES_X86_AVX;
2618 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2619 VAddMicrokernelTester()
2620 .batch_size(batch_size)
2621 .inplace_a(true)
2622 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2623 }
2624 }
2625
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,inplace_b)2626 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_b) {
2627 TEST_REQUIRES_X86_AVX;
2628 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2629 VAddMicrokernelTester()
2630 .batch_size(batch_size)
2631 .inplace_b(true)
2632 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2633 }
2634 }
2635
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,inplace_a_and_b)2636 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, inplace_a_and_b) {
2637 TEST_REQUIRES_X86_AVX;
2638 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2639 VAddMicrokernelTester()
2640 .batch_size(batch_size)
2641 .inplace_a(true)
2642 .inplace_b(true)
2643 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2644 }
2645 }
2646
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,a_zero_point)2647 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, a_zero_point) {
2648 TEST_REQUIRES_X86_AVX;
2649 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2650 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2651 VAddMicrokernelTester()
2652 .batch_size(batch_size)
2653 .a_zero_point(a_zero_point)
2654 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2655 }
2656 }
2657 }
2658
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,b_zero_point)2659 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, b_zero_point) {
2660 TEST_REQUIRES_X86_AVX;
2661 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2662 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2663 VAddMicrokernelTester()
2664 .batch_size(batch_size)
2665 .b_zero_point(b_zero_point)
2666 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2667 }
2668 }
2669 }
2670
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,y_zero_point)2671 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, y_zero_point) {
2672 TEST_REQUIRES_X86_AVX;
2673 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2674 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2675 VAddMicrokernelTester()
2676 .batch_size(batch_size)
2677 .y_zero_point(y_zero_point)
2678 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2679 }
2680 }
2681 }
2682
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,a_scale)2683 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, a_scale) {
2684 TEST_REQUIRES_X86_AVX;
2685 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2686 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2687 VAddMicrokernelTester()
2688 .batch_size(batch_size)
2689 .a_scale(a_scale)
2690 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2691 }
2692 }
2693 }
2694
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,b_scale)2695 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, b_scale) {
2696 TEST_REQUIRES_X86_AVX;
2697 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2698 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2699 VAddMicrokernelTester()
2700 .batch_size(batch_size)
2701 .b_scale(b_scale)
2702 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2703 }
2704 }
2705 }
2706
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,y_scale)2707 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, y_scale) {
2708 TEST_REQUIRES_X86_AVX;
2709 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2710 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2711 VAddMicrokernelTester()
2712 .batch_size(batch_size)
2713 .y_scale(y_scale)
2714 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2715 }
2716 }
2717 }
2718
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,qmin)2719 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, qmin) {
2720 TEST_REQUIRES_X86_AVX;
2721 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2722 VAddMicrokernelTester()
2723 .batch_size(batch_size)
2724 .qmin(128)
2725 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2726 }
2727 }
2728
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24,qmax)2729 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X24, qmax) {
2730 TEST_REQUIRES_X86_AVX;
2731 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2732 VAddMicrokernelTester()
2733 .batch_size(batch_size)
2734 .qmax(128)
2735 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2736 }
2737 }
2738 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2739
2740
2741 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_eq_32)2742 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_eq_32) {
2743 TEST_REQUIRES_X86_AVX;
2744 VAddMicrokernelTester()
2745 .batch_size(32)
2746 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2747 }
2748
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_div_32)2749 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_div_32) {
2750 TEST_REQUIRES_X86_AVX;
2751 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2752 VAddMicrokernelTester()
2753 .batch_size(batch_size)
2754 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2755 }
2756 }
2757
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_lt_32)2758 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_lt_32) {
2759 TEST_REQUIRES_X86_AVX;
2760 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2761 VAddMicrokernelTester()
2762 .batch_size(batch_size)
2763 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2764 }
2765 }
2766
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,batch_gt_32)2767 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, batch_gt_32) {
2768 TEST_REQUIRES_X86_AVX;
2769 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2770 VAddMicrokernelTester()
2771 .batch_size(batch_size)
2772 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2773 }
2774 }
2775
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,inplace_a)2776 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_a) {
2777 TEST_REQUIRES_X86_AVX;
2778 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2779 VAddMicrokernelTester()
2780 .batch_size(batch_size)
2781 .inplace_a(true)
2782 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2783 }
2784 }
2785
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,inplace_b)2786 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_b) {
2787 TEST_REQUIRES_X86_AVX;
2788 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2789 VAddMicrokernelTester()
2790 .batch_size(batch_size)
2791 .inplace_b(true)
2792 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2793 }
2794 }
2795
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,inplace_a_and_b)2796 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, inplace_a_and_b) {
2797 TEST_REQUIRES_X86_AVX;
2798 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2799 VAddMicrokernelTester()
2800 .batch_size(batch_size)
2801 .inplace_a(true)
2802 .inplace_b(true)
2803 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2804 }
2805 }
2806
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,a_zero_point)2807 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, a_zero_point) {
2808 TEST_REQUIRES_X86_AVX;
2809 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2810 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2811 VAddMicrokernelTester()
2812 .batch_size(batch_size)
2813 .a_zero_point(a_zero_point)
2814 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2815 }
2816 }
2817 }
2818
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,b_zero_point)2819 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, b_zero_point) {
2820 TEST_REQUIRES_X86_AVX;
2821 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2822 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2823 VAddMicrokernelTester()
2824 .batch_size(batch_size)
2825 .b_zero_point(b_zero_point)
2826 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2827 }
2828 }
2829 }
2830
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,y_zero_point)2831 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, y_zero_point) {
2832 TEST_REQUIRES_X86_AVX;
2833 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2834 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2835 VAddMicrokernelTester()
2836 .batch_size(batch_size)
2837 .y_zero_point(y_zero_point)
2838 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2839 }
2840 }
2841 }
2842
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,a_scale)2843 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, a_scale) {
2844 TEST_REQUIRES_X86_AVX;
2845 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2846 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2847 VAddMicrokernelTester()
2848 .batch_size(batch_size)
2849 .a_scale(a_scale)
2850 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2851 }
2852 }
2853 }
2854
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,b_scale)2855 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, b_scale) {
2856 TEST_REQUIRES_X86_AVX;
2857 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2858 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2859 VAddMicrokernelTester()
2860 .batch_size(batch_size)
2861 .b_scale(b_scale)
2862 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2863 }
2864 }
2865 }
2866
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,y_scale)2867 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, y_scale) {
2868 TEST_REQUIRES_X86_AVX;
2869 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2870 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2871 VAddMicrokernelTester()
2872 .batch_size(batch_size)
2873 .y_scale(y_scale)
2874 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2875 }
2876 }
2877 }
2878
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,qmin)2879 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, qmin) {
2880 TEST_REQUIRES_X86_AVX;
2881 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2882 VAddMicrokernelTester()
2883 .batch_size(batch_size)
2884 .qmin(128)
2885 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2886 }
2887 }
2888
TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32,qmax)2889 TEST(QS8_VADD_MINMAX__AVX_MUL16_LD64_X32, qmax) {
2890 TEST_REQUIRES_X86_AVX;
2891 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2892 VAddMicrokernelTester()
2893 .batch_size(batch_size)
2894 .qmax(128)
2895 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2896 }
2897 }
2898 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2899
2900
2901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_eq_8)2902 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
2903 TEST_REQUIRES_X86_SSE41;
2904 VAddMicrokernelTester()
2905 .batch_size(8)
2906 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2907 }
2908
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_div_8)2909 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
2910 TEST_REQUIRES_X86_SSE41;
2911 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2912 VAddMicrokernelTester()
2913 .batch_size(batch_size)
2914 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2915 }
2916 }
2917
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_lt_8)2918 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
2919 TEST_REQUIRES_X86_SSE41;
2920 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2921 VAddMicrokernelTester()
2922 .batch_size(batch_size)
2923 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2924 }
2925 }
2926
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,batch_gt_8)2927 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
2928 TEST_REQUIRES_X86_SSE41;
2929 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2930 VAddMicrokernelTester()
2931 .batch_size(batch_size)
2932 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2933 }
2934 }
2935
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_a)2936 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a) {
2937 TEST_REQUIRES_X86_SSE41;
2938 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2939 VAddMicrokernelTester()
2940 .batch_size(batch_size)
2941 .inplace_a(true)
2942 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2943 }
2944 }
2945
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_b)2946 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_b) {
2947 TEST_REQUIRES_X86_SSE41;
2948 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2949 VAddMicrokernelTester()
2950 .batch_size(batch_size)
2951 .inplace_b(true)
2952 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2953 }
2954 }
2955
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,inplace_a_and_b)2956 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, inplace_a_and_b) {
2957 TEST_REQUIRES_X86_SSE41;
2958 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2959 VAddMicrokernelTester()
2960 .batch_size(batch_size)
2961 .inplace_a(true)
2962 .inplace_b(true)
2963 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2964 }
2965 }
2966
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,a_zero_point)2967 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
2968 TEST_REQUIRES_X86_SSE41;
2969 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2970 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2971 VAddMicrokernelTester()
2972 .batch_size(batch_size)
2973 .a_zero_point(a_zero_point)
2974 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2975 }
2976 }
2977 }
2978
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,b_zero_point)2979 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
2980 TEST_REQUIRES_X86_SSE41;
2981 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2982 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2983 VAddMicrokernelTester()
2984 .batch_size(batch_size)
2985 .b_zero_point(b_zero_point)
2986 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2987 }
2988 }
2989 }
2990
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,y_zero_point)2991 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
2992 TEST_REQUIRES_X86_SSE41;
2993 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2994 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2995 VAddMicrokernelTester()
2996 .batch_size(batch_size)
2997 .y_zero_point(y_zero_point)
2998 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2999 }
3000 }
3001 }
3002
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,a_scale)3003 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
3004 TEST_REQUIRES_X86_SSE41;
3005 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3006 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3007 VAddMicrokernelTester()
3008 .batch_size(batch_size)
3009 .a_scale(a_scale)
3010 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3011 }
3012 }
3013 }
3014
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,b_scale)3015 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
3016 TEST_REQUIRES_X86_SSE41;
3017 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3018 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3019 VAddMicrokernelTester()
3020 .batch_size(batch_size)
3021 .b_scale(b_scale)
3022 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3023 }
3024 }
3025 }
3026
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,y_scale)3027 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
3028 TEST_REQUIRES_X86_SSE41;
3029 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3030 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3031 VAddMicrokernelTester()
3032 .batch_size(batch_size)
3033 .y_scale(y_scale)
3034 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3035 }
3036 }
3037 }
3038
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,qmin)3039 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
3040 TEST_REQUIRES_X86_SSE41;
3041 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3042 VAddMicrokernelTester()
3043 .batch_size(batch_size)
3044 .qmin(128)
3045 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3046 }
3047 }
3048
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8,qmax)3049 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
3050 TEST_REQUIRES_X86_SSE41;
3051 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3052 VAddMicrokernelTester()
3053 .batch_size(batch_size)
3054 .qmax(128)
3055 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3056 }
3057 }
3058 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3059
3060
3061 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_eq_16)3062 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
3063 TEST_REQUIRES_X86_SSE41;
3064 VAddMicrokernelTester()
3065 .batch_size(16)
3066 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3067 }
3068
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_div_16)3069 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
3070 TEST_REQUIRES_X86_SSE41;
3071 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3072 VAddMicrokernelTester()
3073 .batch_size(batch_size)
3074 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3075 }
3076 }
3077
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_lt_16)3078 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
3079 TEST_REQUIRES_X86_SSE41;
3080 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3081 VAddMicrokernelTester()
3082 .batch_size(batch_size)
3083 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3084 }
3085 }
3086
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,batch_gt_16)3087 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
3088 TEST_REQUIRES_X86_SSE41;
3089 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3090 VAddMicrokernelTester()
3091 .batch_size(batch_size)
3092 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3093 }
3094 }
3095
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_a)3096 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a) {
3097 TEST_REQUIRES_X86_SSE41;
3098 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3099 VAddMicrokernelTester()
3100 .batch_size(batch_size)
3101 .inplace_a(true)
3102 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3103 }
3104 }
3105
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_b)3106 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_b) {
3107 TEST_REQUIRES_X86_SSE41;
3108 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3109 VAddMicrokernelTester()
3110 .batch_size(batch_size)
3111 .inplace_b(true)
3112 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3113 }
3114 }
3115
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,inplace_a_and_b)3116 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, inplace_a_and_b) {
3117 TEST_REQUIRES_X86_SSE41;
3118 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3119 VAddMicrokernelTester()
3120 .batch_size(batch_size)
3121 .inplace_a(true)
3122 .inplace_b(true)
3123 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3124 }
3125 }
3126
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,a_zero_point)3127 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
3128 TEST_REQUIRES_X86_SSE41;
3129 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3130 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3131 VAddMicrokernelTester()
3132 .batch_size(batch_size)
3133 .a_zero_point(a_zero_point)
3134 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3135 }
3136 }
3137 }
3138
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,b_zero_point)3139 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
3140 TEST_REQUIRES_X86_SSE41;
3141 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3142 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3143 VAddMicrokernelTester()
3144 .batch_size(batch_size)
3145 .b_zero_point(b_zero_point)
3146 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3147 }
3148 }
3149 }
3150
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,y_zero_point)3151 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
3152 TEST_REQUIRES_X86_SSE41;
3153 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3154 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3155 VAddMicrokernelTester()
3156 .batch_size(batch_size)
3157 .y_zero_point(y_zero_point)
3158 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3159 }
3160 }
3161 }
3162
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,a_scale)3163 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
3164 TEST_REQUIRES_X86_SSE41;
3165 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3166 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3167 VAddMicrokernelTester()
3168 .batch_size(batch_size)
3169 .a_scale(a_scale)
3170 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3171 }
3172 }
3173 }
3174
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,b_scale)3175 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
3176 TEST_REQUIRES_X86_SSE41;
3177 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3178 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3179 VAddMicrokernelTester()
3180 .batch_size(batch_size)
3181 .b_scale(b_scale)
3182 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3183 }
3184 }
3185 }
3186
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,y_scale)3187 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
3188 TEST_REQUIRES_X86_SSE41;
3189 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3190 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3191 VAddMicrokernelTester()
3192 .batch_size(batch_size)
3193 .y_scale(y_scale)
3194 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3195 }
3196 }
3197 }
3198
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,qmin)3199 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
3200 TEST_REQUIRES_X86_SSE41;
3201 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3202 VAddMicrokernelTester()
3203 .batch_size(batch_size)
3204 .qmin(128)
3205 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3206 }
3207 }
3208
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16,qmax)3209 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
3210 TEST_REQUIRES_X86_SSE41;
3211 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3212 VAddMicrokernelTester()
3213 .batch_size(batch_size)
3214 .qmax(128)
3215 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3216 }
3217 }
3218 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3219
3220
3221 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_eq_24)3222 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
3223 TEST_REQUIRES_X86_SSE41;
3224 VAddMicrokernelTester()
3225 .batch_size(24)
3226 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3227 }
3228
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_div_24)3229 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
3230 TEST_REQUIRES_X86_SSE41;
3231 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3232 VAddMicrokernelTester()
3233 .batch_size(batch_size)
3234 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3235 }
3236 }
3237
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_lt_24)3238 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
3239 TEST_REQUIRES_X86_SSE41;
3240 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3241 VAddMicrokernelTester()
3242 .batch_size(batch_size)
3243 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3244 }
3245 }
3246
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,batch_gt_24)3247 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
3248 TEST_REQUIRES_X86_SSE41;
3249 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3250 VAddMicrokernelTester()
3251 .batch_size(batch_size)
3252 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3253 }
3254 }
3255
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,inplace_a)3256 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a) {
3257 TEST_REQUIRES_X86_SSE41;
3258 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3259 VAddMicrokernelTester()
3260 .batch_size(batch_size)
3261 .inplace_a(true)
3262 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3263 }
3264 }
3265
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,inplace_b)3266 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_b) {
3267 TEST_REQUIRES_X86_SSE41;
3268 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3269 VAddMicrokernelTester()
3270 .batch_size(batch_size)
3271 .inplace_b(true)
3272 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3273 }
3274 }
3275
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,inplace_a_and_b)3276 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, inplace_a_and_b) {
3277 TEST_REQUIRES_X86_SSE41;
3278 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3279 VAddMicrokernelTester()
3280 .batch_size(batch_size)
3281 .inplace_a(true)
3282 .inplace_b(true)
3283 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3284 }
3285 }
3286
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,a_zero_point)3287 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
3288 TEST_REQUIRES_X86_SSE41;
3289 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3290 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3291 VAddMicrokernelTester()
3292 .batch_size(batch_size)
3293 .a_zero_point(a_zero_point)
3294 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3295 }
3296 }
3297 }
3298
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,b_zero_point)3299 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
3300 TEST_REQUIRES_X86_SSE41;
3301 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3302 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3303 VAddMicrokernelTester()
3304 .batch_size(batch_size)
3305 .b_zero_point(b_zero_point)
3306 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3307 }
3308 }
3309 }
3310
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,y_zero_point)3311 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
3312 TEST_REQUIRES_X86_SSE41;
3313 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3314 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3315 VAddMicrokernelTester()
3316 .batch_size(batch_size)
3317 .y_zero_point(y_zero_point)
3318 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3319 }
3320 }
3321 }
3322
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,a_scale)3323 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
3324 TEST_REQUIRES_X86_SSE41;
3325 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3326 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3327 VAddMicrokernelTester()
3328 .batch_size(batch_size)
3329 .a_scale(a_scale)
3330 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3331 }
3332 }
3333 }
3334
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,b_scale)3335 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
3336 TEST_REQUIRES_X86_SSE41;
3337 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3338 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3339 VAddMicrokernelTester()
3340 .batch_size(batch_size)
3341 .b_scale(b_scale)
3342 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3343 }
3344 }
3345 }
3346
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,y_scale)3347 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
3348 TEST_REQUIRES_X86_SSE41;
3349 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3350 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3351 VAddMicrokernelTester()
3352 .batch_size(batch_size)
3353 .y_scale(y_scale)
3354 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3355 }
3356 }
3357 }
3358
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,qmin)3359 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
3360 TEST_REQUIRES_X86_SSE41;
3361 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3362 VAddMicrokernelTester()
3363 .batch_size(batch_size)
3364 .qmin(128)
3365 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3366 }
3367 }
3368
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24,qmax)3369 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
3370 TEST_REQUIRES_X86_SSE41;
3371 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3372 VAddMicrokernelTester()
3373 .batch_size(batch_size)
3374 .qmax(128)
3375 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3376 }
3377 }
3378 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3379
3380
3381 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_eq_32)3382 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
3383 TEST_REQUIRES_X86_SSE41;
3384 VAddMicrokernelTester()
3385 .batch_size(32)
3386 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3387 }
3388
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_div_32)3389 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
3390 TEST_REQUIRES_X86_SSE41;
3391 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3392 VAddMicrokernelTester()
3393 .batch_size(batch_size)
3394 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3395 }
3396 }
3397
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_lt_32)3398 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
3399 TEST_REQUIRES_X86_SSE41;
3400 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3401 VAddMicrokernelTester()
3402 .batch_size(batch_size)
3403 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3404 }
3405 }
3406
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,batch_gt_32)3407 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
3408 TEST_REQUIRES_X86_SSE41;
3409 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3410 VAddMicrokernelTester()
3411 .batch_size(batch_size)
3412 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3413 }
3414 }
3415
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,inplace_a)3416 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a) {
3417 TEST_REQUIRES_X86_SSE41;
3418 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3419 VAddMicrokernelTester()
3420 .batch_size(batch_size)
3421 .inplace_a(true)
3422 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3423 }
3424 }
3425
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,inplace_b)3426 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_b) {
3427 TEST_REQUIRES_X86_SSE41;
3428 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3429 VAddMicrokernelTester()
3430 .batch_size(batch_size)
3431 .inplace_b(true)
3432 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3433 }
3434 }
3435
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,inplace_a_and_b)3436 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, inplace_a_and_b) {
3437 TEST_REQUIRES_X86_SSE41;
3438 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3439 VAddMicrokernelTester()
3440 .batch_size(batch_size)
3441 .inplace_a(true)
3442 .inplace_b(true)
3443 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3444 }
3445 }
3446
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,a_zero_point)3447 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
3448 TEST_REQUIRES_X86_SSE41;
3449 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3450 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3451 VAddMicrokernelTester()
3452 .batch_size(batch_size)
3453 .a_zero_point(a_zero_point)
3454 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3455 }
3456 }
3457 }
3458
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,b_zero_point)3459 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
3460 TEST_REQUIRES_X86_SSE41;
3461 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3462 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3463 VAddMicrokernelTester()
3464 .batch_size(batch_size)
3465 .b_zero_point(b_zero_point)
3466 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3467 }
3468 }
3469 }
3470
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,y_zero_point)3471 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
3472 TEST_REQUIRES_X86_SSE41;
3473 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3474 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3475 VAddMicrokernelTester()
3476 .batch_size(batch_size)
3477 .y_zero_point(y_zero_point)
3478 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3479 }
3480 }
3481 }
3482
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,a_scale)3483 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
3484 TEST_REQUIRES_X86_SSE41;
3485 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3486 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3487 VAddMicrokernelTester()
3488 .batch_size(batch_size)
3489 .a_scale(a_scale)
3490 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3491 }
3492 }
3493 }
3494
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,b_scale)3495 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
3496 TEST_REQUIRES_X86_SSE41;
3497 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3498 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3499 VAddMicrokernelTester()
3500 .batch_size(batch_size)
3501 .b_scale(b_scale)
3502 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3503 }
3504 }
3505 }
3506
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,y_scale)3507 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
3508 TEST_REQUIRES_X86_SSE41;
3509 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3510 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3511 VAddMicrokernelTester()
3512 .batch_size(batch_size)
3513 .y_scale(y_scale)
3514 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3515 }
3516 }
3517 }
3518
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,qmin)3519 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
3520 TEST_REQUIRES_X86_SSE41;
3521 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3522 VAddMicrokernelTester()
3523 .batch_size(batch_size)
3524 .qmin(128)
3525 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3526 }
3527 }
3528
TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32,qmax)3529 TEST(QS8_VADD_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
3530 TEST_REQUIRES_X86_SSE41;
3531 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3532 VAddMicrokernelTester()
3533 .batch_size(batch_size)
3534 .qmax(128)
3535 .Test(xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3536 }
3537 }
3538 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3539
3540
3541 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_eq_8)3542 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
3543 TEST_REQUIRES_X86_AVX;
3544 VAddMicrokernelTester()
3545 .batch_size(8)
3546 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3547 }
3548
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_div_8)3549 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
3550 TEST_REQUIRES_X86_AVX;
3551 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3552 VAddMicrokernelTester()
3553 .batch_size(batch_size)
3554 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3555 }
3556 }
3557
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_lt_8)3558 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
3559 TEST_REQUIRES_X86_AVX;
3560 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3561 VAddMicrokernelTester()
3562 .batch_size(batch_size)
3563 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3564 }
3565 }
3566
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,batch_gt_8)3567 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
3568 TEST_REQUIRES_X86_AVX;
3569 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3570 VAddMicrokernelTester()
3571 .batch_size(batch_size)
3572 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3573 }
3574 }
3575
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_a)3576 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a) {
3577 TEST_REQUIRES_X86_AVX;
3578 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3579 VAddMicrokernelTester()
3580 .batch_size(batch_size)
3581 .inplace_a(true)
3582 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3583 }
3584 }
3585
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_b)3586 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_b) {
3587 TEST_REQUIRES_X86_AVX;
3588 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3589 VAddMicrokernelTester()
3590 .batch_size(batch_size)
3591 .inplace_b(true)
3592 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3593 }
3594 }
3595
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,inplace_a_and_b)3596 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, inplace_a_and_b) {
3597 TEST_REQUIRES_X86_AVX;
3598 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3599 VAddMicrokernelTester()
3600 .batch_size(batch_size)
3601 .inplace_a(true)
3602 .inplace_b(true)
3603 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3604 }
3605 }
3606
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,a_zero_point)3607 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
3608 TEST_REQUIRES_X86_AVX;
3609 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3610 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3611 VAddMicrokernelTester()
3612 .batch_size(batch_size)
3613 .a_zero_point(a_zero_point)
3614 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3615 }
3616 }
3617 }
3618
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,b_zero_point)3619 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
3620 TEST_REQUIRES_X86_AVX;
3621 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3622 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3623 VAddMicrokernelTester()
3624 .batch_size(batch_size)
3625 .b_zero_point(b_zero_point)
3626 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3627 }
3628 }
3629 }
3630
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,y_zero_point)3631 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
3632 TEST_REQUIRES_X86_AVX;
3633 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3634 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3635 VAddMicrokernelTester()
3636 .batch_size(batch_size)
3637 .y_zero_point(y_zero_point)
3638 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3639 }
3640 }
3641 }
3642
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,a_scale)3643 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
3644 TEST_REQUIRES_X86_AVX;
3645 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3646 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3647 VAddMicrokernelTester()
3648 .batch_size(batch_size)
3649 .a_scale(a_scale)
3650 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3651 }
3652 }
3653 }
3654
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,b_scale)3655 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
3656 TEST_REQUIRES_X86_AVX;
3657 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3658 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3659 VAddMicrokernelTester()
3660 .batch_size(batch_size)
3661 .b_scale(b_scale)
3662 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3663 }
3664 }
3665 }
3666
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,y_scale)3667 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
3668 TEST_REQUIRES_X86_AVX;
3669 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3670 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3671 VAddMicrokernelTester()
3672 .batch_size(batch_size)
3673 .y_scale(y_scale)
3674 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3675 }
3676 }
3677 }
3678
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,qmin)3679 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmin) {
3680 TEST_REQUIRES_X86_AVX;
3681 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3682 VAddMicrokernelTester()
3683 .batch_size(batch_size)
3684 .qmin(128)
3685 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3686 }
3687 }
3688
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8,qmax)3689 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X8, qmax) {
3690 TEST_REQUIRES_X86_AVX;
3691 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3692 VAddMicrokernelTester()
3693 .batch_size(batch_size)
3694 .qmax(128)
3695 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3696 }
3697 }
3698 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3699
3700
3701 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_eq_16)3702 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
3703 TEST_REQUIRES_X86_AVX;
3704 VAddMicrokernelTester()
3705 .batch_size(16)
3706 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3707 }
3708
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_div_16)3709 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
3710 TEST_REQUIRES_X86_AVX;
3711 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3712 VAddMicrokernelTester()
3713 .batch_size(batch_size)
3714 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3715 }
3716 }
3717
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_lt_16)3718 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
3719 TEST_REQUIRES_X86_AVX;
3720 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3721 VAddMicrokernelTester()
3722 .batch_size(batch_size)
3723 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3724 }
3725 }
3726
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,batch_gt_16)3727 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
3728 TEST_REQUIRES_X86_AVX;
3729 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3730 VAddMicrokernelTester()
3731 .batch_size(batch_size)
3732 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3733 }
3734 }
3735
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_a)3736 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a) {
3737 TEST_REQUIRES_X86_AVX;
3738 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3739 VAddMicrokernelTester()
3740 .batch_size(batch_size)
3741 .inplace_a(true)
3742 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3743 }
3744 }
3745
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_b)3746 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_b) {
3747 TEST_REQUIRES_X86_AVX;
3748 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3749 VAddMicrokernelTester()
3750 .batch_size(batch_size)
3751 .inplace_b(true)
3752 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3753 }
3754 }
3755
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,inplace_a_and_b)3756 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, inplace_a_and_b) {
3757 TEST_REQUIRES_X86_AVX;
3758 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3759 VAddMicrokernelTester()
3760 .batch_size(batch_size)
3761 .inplace_a(true)
3762 .inplace_b(true)
3763 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3764 }
3765 }
3766
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,a_zero_point)3767 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
3768 TEST_REQUIRES_X86_AVX;
3769 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3770 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3771 VAddMicrokernelTester()
3772 .batch_size(batch_size)
3773 .a_zero_point(a_zero_point)
3774 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3775 }
3776 }
3777 }
3778
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,b_zero_point)3779 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
3780 TEST_REQUIRES_X86_AVX;
3781 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3782 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3783 VAddMicrokernelTester()
3784 .batch_size(batch_size)
3785 .b_zero_point(b_zero_point)
3786 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3787 }
3788 }
3789 }
3790
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,y_zero_point)3791 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
3792 TEST_REQUIRES_X86_AVX;
3793 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3794 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3795 VAddMicrokernelTester()
3796 .batch_size(batch_size)
3797 .y_zero_point(y_zero_point)
3798 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3799 }
3800 }
3801 }
3802
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,a_scale)3803 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
3804 TEST_REQUIRES_X86_AVX;
3805 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3806 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3807 VAddMicrokernelTester()
3808 .batch_size(batch_size)
3809 .a_scale(a_scale)
3810 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3811 }
3812 }
3813 }
3814
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,b_scale)3815 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
3816 TEST_REQUIRES_X86_AVX;
3817 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3818 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3819 VAddMicrokernelTester()
3820 .batch_size(batch_size)
3821 .b_scale(b_scale)
3822 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3823 }
3824 }
3825 }
3826
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,y_scale)3827 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
3828 TEST_REQUIRES_X86_AVX;
3829 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3830 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3831 VAddMicrokernelTester()
3832 .batch_size(batch_size)
3833 .y_scale(y_scale)
3834 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3835 }
3836 }
3837 }
3838
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,qmin)3839 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmin) {
3840 TEST_REQUIRES_X86_AVX;
3841 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3842 VAddMicrokernelTester()
3843 .batch_size(batch_size)
3844 .qmin(128)
3845 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3846 }
3847 }
3848
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16,qmax)3849 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X16, qmax) {
3850 TEST_REQUIRES_X86_AVX;
3851 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3852 VAddMicrokernelTester()
3853 .batch_size(batch_size)
3854 .qmax(128)
3855 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3856 }
3857 }
3858 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3859
3860
3861 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_eq_24)3862 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_eq_24) {
3863 TEST_REQUIRES_X86_AVX;
3864 VAddMicrokernelTester()
3865 .batch_size(24)
3866 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3867 }
3868
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_div_24)3869 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_div_24) {
3870 TEST_REQUIRES_X86_AVX;
3871 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3872 VAddMicrokernelTester()
3873 .batch_size(batch_size)
3874 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3875 }
3876 }
3877
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_lt_24)3878 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_lt_24) {
3879 TEST_REQUIRES_X86_AVX;
3880 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3881 VAddMicrokernelTester()
3882 .batch_size(batch_size)
3883 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3884 }
3885 }
3886
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,batch_gt_24)3887 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, batch_gt_24) {
3888 TEST_REQUIRES_X86_AVX;
3889 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3890 VAddMicrokernelTester()
3891 .batch_size(batch_size)
3892 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3893 }
3894 }
3895
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,inplace_a)3896 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_a) {
3897 TEST_REQUIRES_X86_AVX;
3898 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3899 VAddMicrokernelTester()
3900 .batch_size(batch_size)
3901 .inplace_a(true)
3902 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3903 }
3904 }
3905
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,inplace_b)3906 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_b) {
3907 TEST_REQUIRES_X86_AVX;
3908 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3909 VAddMicrokernelTester()
3910 .batch_size(batch_size)
3911 .inplace_b(true)
3912 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3913 }
3914 }
3915
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,inplace_a_and_b)3916 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, inplace_a_and_b) {
3917 TEST_REQUIRES_X86_AVX;
3918 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3919 VAddMicrokernelTester()
3920 .batch_size(batch_size)
3921 .inplace_a(true)
3922 .inplace_b(true)
3923 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3924 }
3925 }
3926
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,a_zero_point)3927 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, a_zero_point) {
3928 TEST_REQUIRES_X86_AVX;
3929 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3930 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3931 VAddMicrokernelTester()
3932 .batch_size(batch_size)
3933 .a_zero_point(a_zero_point)
3934 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3935 }
3936 }
3937 }
3938
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,b_zero_point)3939 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, b_zero_point) {
3940 TEST_REQUIRES_X86_AVX;
3941 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3942 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3943 VAddMicrokernelTester()
3944 .batch_size(batch_size)
3945 .b_zero_point(b_zero_point)
3946 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3947 }
3948 }
3949 }
3950
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,y_zero_point)3951 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, y_zero_point) {
3952 TEST_REQUIRES_X86_AVX;
3953 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3954 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3955 VAddMicrokernelTester()
3956 .batch_size(batch_size)
3957 .y_zero_point(y_zero_point)
3958 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3959 }
3960 }
3961 }
3962
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,a_scale)3963 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, a_scale) {
3964 TEST_REQUIRES_X86_AVX;
3965 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3966 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3967 VAddMicrokernelTester()
3968 .batch_size(batch_size)
3969 .a_scale(a_scale)
3970 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3971 }
3972 }
3973 }
3974
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,b_scale)3975 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, b_scale) {
3976 TEST_REQUIRES_X86_AVX;
3977 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3978 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3979 VAddMicrokernelTester()
3980 .batch_size(batch_size)
3981 .b_scale(b_scale)
3982 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3983 }
3984 }
3985 }
3986
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,y_scale)3987 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, y_scale) {
3988 TEST_REQUIRES_X86_AVX;
3989 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3990 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3991 VAddMicrokernelTester()
3992 .batch_size(batch_size)
3993 .y_scale(y_scale)
3994 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3995 }
3996 }
3997 }
3998
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,qmin)3999 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, qmin) {
4000 TEST_REQUIRES_X86_AVX;
4001 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4002 VAddMicrokernelTester()
4003 .batch_size(batch_size)
4004 .qmin(128)
4005 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4006 }
4007 }
4008
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24,qmax)4009 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X24, qmax) {
4010 TEST_REQUIRES_X86_AVX;
4011 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4012 VAddMicrokernelTester()
4013 .batch_size(batch_size)
4014 .qmax(128)
4015 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4016 }
4017 }
4018 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4019
4020
4021 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_eq_32)4022 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_eq_32) {
4023 TEST_REQUIRES_X86_AVX;
4024 VAddMicrokernelTester()
4025 .batch_size(32)
4026 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4027 }
4028
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_div_32)4029 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_div_32) {
4030 TEST_REQUIRES_X86_AVX;
4031 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4032 VAddMicrokernelTester()
4033 .batch_size(batch_size)
4034 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4035 }
4036 }
4037
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_lt_32)4038 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_lt_32) {
4039 TEST_REQUIRES_X86_AVX;
4040 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4041 VAddMicrokernelTester()
4042 .batch_size(batch_size)
4043 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4044 }
4045 }
4046
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,batch_gt_32)4047 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, batch_gt_32) {
4048 TEST_REQUIRES_X86_AVX;
4049 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4050 VAddMicrokernelTester()
4051 .batch_size(batch_size)
4052 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4053 }
4054 }
4055
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,inplace_a)4056 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_a) {
4057 TEST_REQUIRES_X86_AVX;
4058 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4059 VAddMicrokernelTester()
4060 .batch_size(batch_size)
4061 .inplace_a(true)
4062 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4063 }
4064 }
4065
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,inplace_b)4066 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_b) {
4067 TEST_REQUIRES_X86_AVX;
4068 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4069 VAddMicrokernelTester()
4070 .batch_size(batch_size)
4071 .inplace_b(true)
4072 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4073 }
4074 }
4075
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,inplace_a_and_b)4076 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, inplace_a_and_b) {
4077 TEST_REQUIRES_X86_AVX;
4078 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4079 VAddMicrokernelTester()
4080 .batch_size(batch_size)
4081 .inplace_a(true)
4082 .inplace_b(true)
4083 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4084 }
4085 }
4086
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,a_zero_point)4087 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, a_zero_point) {
4088 TEST_REQUIRES_X86_AVX;
4089 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4090 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4091 VAddMicrokernelTester()
4092 .batch_size(batch_size)
4093 .a_zero_point(a_zero_point)
4094 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4095 }
4096 }
4097 }
4098
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,b_zero_point)4099 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, b_zero_point) {
4100 TEST_REQUIRES_X86_AVX;
4101 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4103 VAddMicrokernelTester()
4104 .batch_size(batch_size)
4105 .b_zero_point(b_zero_point)
4106 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4107 }
4108 }
4109 }
4110
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,y_zero_point)4111 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, y_zero_point) {
4112 TEST_REQUIRES_X86_AVX;
4113 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4115 VAddMicrokernelTester()
4116 .batch_size(batch_size)
4117 .y_zero_point(y_zero_point)
4118 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4119 }
4120 }
4121 }
4122
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,a_scale)4123 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, a_scale) {
4124 TEST_REQUIRES_X86_AVX;
4125 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4127 VAddMicrokernelTester()
4128 .batch_size(batch_size)
4129 .a_scale(a_scale)
4130 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4131 }
4132 }
4133 }
4134
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,b_scale)4135 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, b_scale) {
4136 TEST_REQUIRES_X86_AVX;
4137 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4139 VAddMicrokernelTester()
4140 .batch_size(batch_size)
4141 .b_scale(b_scale)
4142 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4143 }
4144 }
4145 }
4146
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,y_scale)4147 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, y_scale) {
4148 TEST_REQUIRES_X86_AVX;
4149 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4151 VAddMicrokernelTester()
4152 .batch_size(batch_size)
4153 .y_scale(y_scale)
4154 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4155 }
4156 }
4157 }
4158
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,qmin)4159 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, qmin) {
4160 TEST_REQUIRES_X86_AVX;
4161 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4162 VAddMicrokernelTester()
4163 .batch_size(batch_size)
4164 .qmin(128)
4165 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4166 }
4167 }
4168
TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32,qmax)4169 TEST(QS8_VADD_MINMAX__AVX_MUL32_LD32_X32, qmax) {
4170 TEST_REQUIRES_X86_AVX;
4171 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4172 VAddMicrokernelTester()
4173 .batch_size(batch_size)
4174 .qmax(128)
4175 .Test(xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4176 }
4177 }
4178 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4179
4180
4181 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_eq_8)4182 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
4183 TEST_REQUIRES_X86_XOP;
4184 VAddMicrokernelTester()
4185 .batch_size(8)
4186 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4187 }
4188
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_div_8)4189 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
4190 TEST_REQUIRES_X86_XOP;
4191 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4192 VAddMicrokernelTester()
4193 .batch_size(batch_size)
4194 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4195 }
4196 }
4197
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_lt_8)4198 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
4199 TEST_REQUIRES_X86_XOP;
4200 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4201 VAddMicrokernelTester()
4202 .batch_size(batch_size)
4203 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4204 }
4205 }
4206
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,batch_gt_8)4207 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
4208 TEST_REQUIRES_X86_XOP;
4209 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4210 VAddMicrokernelTester()
4211 .batch_size(batch_size)
4212 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4213 }
4214 }
4215
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_a)4216 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a) {
4217 TEST_REQUIRES_X86_XOP;
4218 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4219 VAddMicrokernelTester()
4220 .batch_size(batch_size)
4221 .inplace_a(true)
4222 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4223 }
4224 }
4225
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_b)4226 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_b) {
4227 TEST_REQUIRES_X86_XOP;
4228 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4229 VAddMicrokernelTester()
4230 .batch_size(batch_size)
4231 .inplace_b(true)
4232 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4233 }
4234 }
4235
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,inplace_a_and_b)4236 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, inplace_a_and_b) {
4237 TEST_REQUIRES_X86_XOP;
4238 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4239 VAddMicrokernelTester()
4240 .batch_size(batch_size)
4241 .inplace_a(true)
4242 .inplace_b(true)
4243 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4244 }
4245 }
4246
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,a_zero_point)4247 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
4248 TEST_REQUIRES_X86_XOP;
4249 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4251 VAddMicrokernelTester()
4252 .batch_size(batch_size)
4253 .a_zero_point(a_zero_point)
4254 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4255 }
4256 }
4257 }
4258
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,b_zero_point)4259 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
4260 TEST_REQUIRES_X86_XOP;
4261 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4263 VAddMicrokernelTester()
4264 .batch_size(batch_size)
4265 .b_zero_point(b_zero_point)
4266 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4267 }
4268 }
4269 }
4270
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,y_zero_point)4271 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
4272 TEST_REQUIRES_X86_XOP;
4273 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4275 VAddMicrokernelTester()
4276 .batch_size(batch_size)
4277 .y_zero_point(y_zero_point)
4278 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4279 }
4280 }
4281 }
4282
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,a_scale)4283 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
4284 TEST_REQUIRES_X86_XOP;
4285 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4287 VAddMicrokernelTester()
4288 .batch_size(batch_size)
4289 .a_scale(a_scale)
4290 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4291 }
4292 }
4293 }
4294
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,b_scale)4295 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
4296 TEST_REQUIRES_X86_XOP;
4297 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4299 VAddMicrokernelTester()
4300 .batch_size(batch_size)
4301 .b_scale(b_scale)
4302 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4303 }
4304 }
4305 }
4306
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,y_scale)4307 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
4308 TEST_REQUIRES_X86_XOP;
4309 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4311 VAddMicrokernelTester()
4312 .batch_size(batch_size)
4313 .y_scale(y_scale)
4314 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4315 }
4316 }
4317 }
4318
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,qmin)4319 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmin) {
4320 TEST_REQUIRES_X86_XOP;
4321 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4322 VAddMicrokernelTester()
4323 .batch_size(batch_size)
4324 .qmin(128)
4325 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4326 }
4327 }
4328
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8,qmax)4329 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X8, qmax) {
4330 TEST_REQUIRES_X86_XOP;
4331 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4332 VAddMicrokernelTester()
4333 .batch_size(batch_size)
4334 .qmax(128)
4335 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
4336 }
4337 }
4338 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4339
4340
4341 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_eq_16)4342 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
4343 TEST_REQUIRES_X86_XOP;
4344 VAddMicrokernelTester()
4345 .batch_size(16)
4346 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4347 }
4348
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_div_16)4349 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
4350 TEST_REQUIRES_X86_XOP;
4351 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4352 VAddMicrokernelTester()
4353 .batch_size(batch_size)
4354 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4355 }
4356 }
4357
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_lt_16)4358 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
4359 TEST_REQUIRES_X86_XOP;
4360 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4361 VAddMicrokernelTester()
4362 .batch_size(batch_size)
4363 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4364 }
4365 }
4366
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,batch_gt_16)4367 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
4368 TEST_REQUIRES_X86_XOP;
4369 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4370 VAddMicrokernelTester()
4371 .batch_size(batch_size)
4372 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4373 }
4374 }
4375
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_a)4376 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a) {
4377 TEST_REQUIRES_X86_XOP;
4378 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4379 VAddMicrokernelTester()
4380 .batch_size(batch_size)
4381 .inplace_a(true)
4382 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4383 }
4384 }
4385
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_b)4386 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_b) {
4387 TEST_REQUIRES_X86_XOP;
4388 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4389 VAddMicrokernelTester()
4390 .batch_size(batch_size)
4391 .inplace_b(true)
4392 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4393 }
4394 }
4395
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,inplace_a_and_b)4396 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, inplace_a_and_b) {
4397 TEST_REQUIRES_X86_XOP;
4398 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4399 VAddMicrokernelTester()
4400 .batch_size(batch_size)
4401 .inplace_a(true)
4402 .inplace_b(true)
4403 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4404 }
4405 }
4406
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,a_zero_point)4407 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
4408 TEST_REQUIRES_X86_XOP;
4409 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4411 VAddMicrokernelTester()
4412 .batch_size(batch_size)
4413 .a_zero_point(a_zero_point)
4414 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4415 }
4416 }
4417 }
4418
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,b_zero_point)4419 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
4420 TEST_REQUIRES_X86_XOP;
4421 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4423 VAddMicrokernelTester()
4424 .batch_size(batch_size)
4425 .b_zero_point(b_zero_point)
4426 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4427 }
4428 }
4429 }
4430
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,y_zero_point)4431 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
4432 TEST_REQUIRES_X86_XOP;
4433 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4435 VAddMicrokernelTester()
4436 .batch_size(batch_size)
4437 .y_zero_point(y_zero_point)
4438 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4439 }
4440 }
4441 }
4442
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,a_scale)4443 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
4444 TEST_REQUIRES_X86_XOP;
4445 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4447 VAddMicrokernelTester()
4448 .batch_size(batch_size)
4449 .a_scale(a_scale)
4450 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4451 }
4452 }
4453 }
4454
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,b_scale)4455 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
4456 TEST_REQUIRES_X86_XOP;
4457 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4459 VAddMicrokernelTester()
4460 .batch_size(batch_size)
4461 .b_scale(b_scale)
4462 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4463 }
4464 }
4465 }
4466
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,y_scale)4467 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
4468 TEST_REQUIRES_X86_XOP;
4469 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4471 VAddMicrokernelTester()
4472 .batch_size(batch_size)
4473 .y_scale(y_scale)
4474 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4475 }
4476 }
4477 }
4478
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,qmin)4479 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmin) {
4480 TEST_REQUIRES_X86_XOP;
4481 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4482 VAddMicrokernelTester()
4483 .batch_size(batch_size)
4484 .qmin(128)
4485 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4486 }
4487 }
4488
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16,qmax)4489 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X16, qmax) {
4490 TEST_REQUIRES_X86_XOP;
4491 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4492 VAddMicrokernelTester()
4493 .batch_size(batch_size)
4494 .qmax(128)
4495 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
4496 }
4497 }
4498 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4499
4500
4501 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_eq_24)4502 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
4503 TEST_REQUIRES_X86_XOP;
4504 VAddMicrokernelTester()
4505 .batch_size(24)
4506 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4507 }
4508
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_div_24)4509 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
4510 TEST_REQUIRES_X86_XOP;
4511 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4512 VAddMicrokernelTester()
4513 .batch_size(batch_size)
4514 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4515 }
4516 }
4517
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_lt_24)4518 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
4519 TEST_REQUIRES_X86_XOP;
4520 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4521 VAddMicrokernelTester()
4522 .batch_size(batch_size)
4523 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4524 }
4525 }
4526
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,batch_gt_24)4527 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
4528 TEST_REQUIRES_X86_XOP;
4529 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4530 VAddMicrokernelTester()
4531 .batch_size(batch_size)
4532 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4533 }
4534 }
4535
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,inplace_a)4536 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a) {
4537 TEST_REQUIRES_X86_XOP;
4538 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4539 VAddMicrokernelTester()
4540 .batch_size(batch_size)
4541 .inplace_a(true)
4542 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4543 }
4544 }
4545
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,inplace_b)4546 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_b) {
4547 TEST_REQUIRES_X86_XOP;
4548 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4549 VAddMicrokernelTester()
4550 .batch_size(batch_size)
4551 .inplace_b(true)
4552 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4553 }
4554 }
4555
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,inplace_a_and_b)4556 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, inplace_a_and_b) {
4557 TEST_REQUIRES_X86_XOP;
4558 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4559 VAddMicrokernelTester()
4560 .batch_size(batch_size)
4561 .inplace_a(true)
4562 .inplace_b(true)
4563 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4564 }
4565 }
4566
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,a_zero_point)4567 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
4568 TEST_REQUIRES_X86_XOP;
4569 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4571 VAddMicrokernelTester()
4572 .batch_size(batch_size)
4573 .a_zero_point(a_zero_point)
4574 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4575 }
4576 }
4577 }
4578
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,b_zero_point)4579 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
4580 TEST_REQUIRES_X86_XOP;
4581 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4583 VAddMicrokernelTester()
4584 .batch_size(batch_size)
4585 .b_zero_point(b_zero_point)
4586 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4587 }
4588 }
4589 }
4590
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,y_zero_point)4591 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
4592 TEST_REQUIRES_X86_XOP;
4593 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4595 VAddMicrokernelTester()
4596 .batch_size(batch_size)
4597 .y_zero_point(y_zero_point)
4598 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4599 }
4600 }
4601 }
4602
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,a_scale)4603 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
4604 TEST_REQUIRES_X86_XOP;
4605 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4607 VAddMicrokernelTester()
4608 .batch_size(batch_size)
4609 .a_scale(a_scale)
4610 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4611 }
4612 }
4613 }
4614
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,b_scale)4615 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
4616 TEST_REQUIRES_X86_XOP;
4617 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4619 VAddMicrokernelTester()
4620 .batch_size(batch_size)
4621 .b_scale(b_scale)
4622 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4623 }
4624 }
4625 }
4626
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,y_scale)4627 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
4628 TEST_REQUIRES_X86_XOP;
4629 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4631 VAddMicrokernelTester()
4632 .batch_size(batch_size)
4633 .y_scale(y_scale)
4634 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4635 }
4636 }
4637 }
4638
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,qmin)4639 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmin) {
4640 TEST_REQUIRES_X86_XOP;
4641 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4642 VAddMicrokernelTester()
4643 .batch_size(batch_size)
4644 .qmin(128)
4645 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4646 }
4647 }
4648
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24,qmax)4649 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X24, qmax) {
4650 TEST_REQUIRES_X86_XOP;
4651 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4652 VAddMicrokernelTester()
4653 .batch_size(batch_size)
4654 .qmax(128)
4655 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4656 }
4657 }
4658 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4659
4660
4661 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_eq_32)4662 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
4663 TEST_REQUIRES_X86_XOP;
4664 VAddMicrokernelTester()
4665 .batch_size(32)
4666 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4667 }
4668
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_div_32)4669 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
4670 TEST_REQUIRES_X86_XOP;
4671 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4672 VAddMicrokernelTester()
4673 .batch_size(batch_size)
4674 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4675 }
4676 }
4677
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_lt_32)4678 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
4679 TEST_REQUIRES_X86_XOP;
4680 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4681 VAddMicrokernelTester()
4682 .batch_size(batch_size)
4683 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4684 }
4685 }
4686
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,batch_gt_32)4687 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
4688 TEST_REQUIRES_X86_XOP;
4689 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4690 VAddMicrokernelTester()
4691 .batch_size(batch_size)
4692 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4693 }
4694 }
4695
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,inplace_a)4696 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a) {
4697 TEST_REQUIRES_X86_XOP;
4698 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4699 VAddMicrokernelTester()
4700 .batch_size(batch_size)
4701 .inplace_a(true)
4702 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4703 }
4704 }
4705
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,inplace_b)4706 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_b) {
4707 TEST_REQUIRES_X86_XOP;
4708 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4709 VAddMicrokernelTester()
4710 .batch_size(batch_size)
4711 .inplace_b(true)
4712 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4713 }
4714 }
4715
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,inplace_a_and_b)4716 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, inplace_a_and_b) {
4717 TEST_REQUIRES_X86_XOP;
4718 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4719 VAddMicrokernelTester()
4720 .batch_size(batch_size)
4721 .inplace_a(true)
4722 .inplace_b(true)
4723 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4724 }
4725 }
4726
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,a_zero_point)4727 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
4728 TEST_REQUIRES_X86_XOP;
4729 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4731 VAddMicrokernelTester()
4732 .batch_size(batch_size)
4733 .a_zero_point(a_zero_point)
4734 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4735 }
4736 }
4737 }
4738
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,b_zero_point)4739 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
4740 TEST_REQUIRES_X86_XOP;
4741 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4743 VAddMicrokernelTester()
4744 .batch_size(batch_size)
4745 .b_zero_point(b_zero_point)
4746 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4747 }
4748 }
4749 }
4750
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,y_zero_point)4751 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
4752 TEST_REQUIRES_X86_XOP;
4753 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4755 VAddMicrokernelTester()
4756 .batch_size(batch_size)
4757 .y_zero_point(y_zero_point)
4758 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4759 }
4760 }
4761 }
4762
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,a_scale)4763 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
4764 TEST_REQUIRES_X86_XOP;
4765 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4767 VAddMicrokernelTester()
4768 .batch_size(batch_size)
4769 .a_scale(a_scale)
4770 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4771 }
4772 }
4773 }
4774
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,b_scale)4775 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
4776 TEST_REQUIRES_X86_XOP;
4777 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4779 VAddMicrokernelTester()
4780 .batch_size(batch_size)
4781 .b_scale(b_scale)
4782 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4783 }
4784 }
4785 }
4786
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,y_scale)4787 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
4788 TEST_REQUIRES_X86_XOP;
4789 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4791 VAddMicrokernelTester()
4792 .batch_size(batch_size)
4793 .y_scale(y_scale)
4794 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4795 }
4796 }
4797 }
4798
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,qmin)4799 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmin) {
4800 TEST_REQUIRES_X86_XOP;
4801 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4802 VAddMicrokernelTester()
4803 .batch_size(batch_size)
4804 .qmin(128)
4805 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4806 }
4807 }
4808
TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32,qmax)4809 TEST(QS8_VADD_MINMAX__XOP_MUL32_LD32_X32, qmax) {
4810 TEST_REQUIRES_X86_XOP;
4811 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4812 VAddMicrokernelTester()
4813 .batch_size(batch_size)
4814 .qmax(128)
4815 .Test(xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4816 }
4817 }
4818 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4819
4820
4821 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_eq_8)4822 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
4823 TEST_REQUIRES_X86_AVX2;
4824 VAddMicrokernelTester()
4825 .batch_size(8)
4826 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4827 }
4828
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_div_8)4829 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
4830 TEST_REQUIRES_X86_AVX2;
4831 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4832 VAddMicrokernelTester()
4833 .batch_size(batch_size)
4834 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4835 }
4836 }
4837
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_lt_8)4838 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
4839 TEST_REQUIRES_X86_AVX2;
4840 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4841 VAddMicrokernelTester()
4842 .batch_size(batch_size)
4843 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4844 }
4845 }
4846
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,batch_gt_8)4847 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
4848 TEST_REQUIRES_X86_AVX2;
4849 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4850 VAddMicrokernelTester()
4851 .batch_size(batch_size)
4852 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4853 }
4854 }
4855
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_a)4856 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a) {
4857 TEST_REQUIRES_X86_AVX2;
4858 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4859 VAddMicrokernelTester()
4860 .batch_size(batch_size)
4861 .inplace_a(true)
4862 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4863 }
4864 }
4865
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_b)4866 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_b) {
4867 TEST_REQUIRES_X86_AVX2;
4868 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4869 VAddMicrokernelTester()
4870 .batch_size(batch_size)
4871 .inplace_b(true)
4872 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4873 }
4874 }
4875
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,inplace_a_and_b)4876 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, inplace_a_and_b) {
4877 TEST_REQUIRES_X86_AVX2;
4878 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4879 VAddMicrokernelTester()
4880 .batch_size(batch_size)
4881 .inplace_a(true)
4882 .inplace_b(true)
4883 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4884 }
4885 }
4886
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,a_zero_point)4887 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
4888 TEST_REQUIRES_X86_AVX2;
4889 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4891 VAddMicrokernelTester()
4892 .batch_size(batch_size)
4893 .a_zero_point(a_zero_point)
4894 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4895 }
4896 }
4897 }
4898
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,b_zero_point)4899 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
4900 TEST_REQUIRES_X86_AVX2;
4901 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4903 VAddMicrokernelTester()
4904 .batch_size(batch_size)
4905 .b_zero_point(b_zero_point)
4906 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4907 }
4908 }
4909 }
4910
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,y_zero_point)4911 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
4912 TEST_REQUIRES_X86_AVX2;
4913 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4915 VAddMicrokernelTester()
4916 .batch_size(batch_size)
4917 .y_zero_point(y_zero_point)
4918 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4919 }
4920 }
4921 }
4922
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,a_scale)4923 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
4924 TEST_REQUIRES_X86_AVX2;
4925 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4927 VAddMicrokernelTester()
4928 .batch_size(batch_size)
4929 .a_scale(a_scale)
4930 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4931 }
4932 }
4933 }
4934
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,b_scale)4935 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
4936 TEST_REQUIRES_X86_AVX2;
4937 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4939 VAddMicrokernelTester()
4940 .batch_size(batch_size)
4941 .b_scale(b_scale)
4942 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4943 }
4944 }
4945 }
4946
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,y_scale)4947 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
4948 TEST_REQUIRES_X86_AVX2;
4949 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4951 VAddMicrokernelTester()
4952 .batch_size(batch_size)
4953 .y_scale(y_scale)
4954 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4955 }
4956 }
4957 }
4958
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,qmin)4959 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
4960 TEST_REQUIRES_X86_AVX2;
4961 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4962 VAddMicrokernelTester()
4963 .batch_size(batch_size)
4964 .qmin(128)
4965 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4966 }
4967 }
4968
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8,qmax)4969 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
4970 TEST_REQUIRES_X86_AVX2;
4971 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4972 VAddMicrokernelTester()
4973 .batch_size(batch_size)
4974 .qmax(128)
4975 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4976 }
4977 }
4978 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4979
4980
4981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_eq_16)4982 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
4983 TEST_REQUIRES_X86_AVX2;
4984 VAddMicrokernelTester()
4985 .batch_size(16)
4986 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4987 }
4988
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_div_16)4989 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
4990 TEST_REQUIRES_X86_AVX2;
4991 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4992 VAddMicrokernelTester()
4993 .batch_size(batch_size)
4994 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4995 }
4996 }
4997
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_lt_16)4998 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
4999 TEST_REQUIRES_X86_AVX2;
5000 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5001 VAddMicrokernelTester()
5002 .batch_size(batch_size)
5003 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5004 }
5005 }
5006
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,batch_gt_16)5007 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
5008 TEST_REQUIRES_X86_AVX2;
5009 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5010 VAddMicrokernelTester()
5011 .batch_size(batch_size)
5012 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5013 }
5014 }
5015
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_a)5016 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a) {
5017 TEST_REQUIRES_X86_AVX2;
5018 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5019 VAddMicrokernelTester()
5020 .batch_size(batch_size)
5021 .inplace_a(true)
5022 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5023 }
5024 }
5025
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_b)5026 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_b) {
5027 TEST_REQUIRES_X86_AVX2;
5028 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5029 VAddMicrokernelTester()
5030 .batch_size(batch_size)
5031 .inplace_b(true)
5032 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5033 }
5034 }
5035
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,inplace_a_and_b)5036 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, inplace_a_and_b) {
5037 TEST_REQUIRES_X86_AVX2;
5038 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5039 VAddMicrokernelTester()
5040 .batch_size(batch_size)
5041 .inplace_a(true)
5042 .inplace_b(true)
5043 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5044 }
5045 }
5046
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,a_zero_point)5047 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
5048 TEST_REQUIRES_X86_AVX2;
5049 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5051 VAddMicrokernelTester()
5052 .batch_size(batch_size)
5053 .a_zero_point(a_zero_point)
5054 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5055 }
5056 }
5057 }
5058
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,b_zero_point)5059 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
5060 TEST_REQUIRES_X86_AVX2;
5061 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5063 VAddMicrokernelTester()
5064 .batch_size(batch_size)
5065 .b_zero_point(b_zero_point)
5066 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5067 }
5068 }
5069 }
5070
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,y_zero_point)5071 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
5072 TEST_REQUIRES_X86_AVX2;
5073 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5075 VAddMicrokernelTester()
5076 .batch_size(batch_size)
5077 .y_zero_point(y_zero_point)
5078 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5079 }
5080 }
5081 }
5082
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,a_scale)5083 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
5084 TEST_REQUIRES_X86_AVX2;
5085 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5087 VAddMicrokernelTester()
5088 .batch_size(batch_size)
5089 .a_scale(a_scale)
5090 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5091 }
5092 }
5093 }
5094
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,b_scale)5095 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
5096 TEST_REQUIRES_X86_AVX2;
5097 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5099 VAddMicrokernelTester()
5100 .batch_size(batch_size)
5101 .b_scale(b_scale)
5102 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5103 }
5104 }
5105 }
5106
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,y_scale)5107 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
5108 TEST_REQUIRES_X86_AVX2;
5109 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5111 VAddMicrokernelTester()
5112 .batch_size(batch_size)
5113 .y_scale(y_scale)
5114 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5115 }
5116 }
5117 }
5118
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,qmin)5119 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
5120 TEST_REQUIRES_X86_AVX2;
5121 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5122 VAddMicrokernelTester()
5123 .batch_size(batch_size)
5124 .qmin(128)
5125 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5126 }
5127 }
5128
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16,qmax)5129 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
5130 TEST_REQUIRES_X86_AVX2;
5131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5132 VAddMicrokernelTester()
5133 .batch_size(batch_size)
5134 .qmax(128)
5135 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
5136 }
5137 }
5138 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5139
5140
5141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_eq_24)5142 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
5143 TEST_REQUIRES_X86_AVX2;
5144 VAddMicrokernelTester()
5145 .batch_size(24)
5146 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5147 }
5148
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_div_24)5149 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
5150 TEST_REQUIRES_X86_AVX2;
5151 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5152 VAddMicrokernelTester()
5153 .batch_size(batch_size)
5154 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5155 }
5156 }
5157
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_lt_24)5158 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
5159 TEST_REQUIRES_X86_AVX2;
5160 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5161 VAddMicrokernelTester()
5162 .batch_size(batch_size)
5163 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5164 }
5165 }
5166
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,batch_gt_24)5167 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
5168 TEST_REQUIRES_X86_AVX2;
5169 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5170 VAddMicrokernelTester()
5171 .batch_size(batch_size)
5172 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5173 }
5174 }
5175
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,inplace_a)5176 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a) {
5177 TEST_REQUIRES_X86_AVX2;
5178 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5179 VAddMicrokernelTester()
5180 .batch_size(batch_size)
5181 .inplace_a(true)
5182 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5183 }
5184 }
5185
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,inplace_b)5186 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_b) {
5187 TEST_REQUIRES_X86_AVX2;
5188 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5189 VAddMicrokernelTester()
5190 .batch_size(batch_size)
5191 .inplace_b(true)
5192 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5193 }
5194 }
5195
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,inplace_a_and_b)5196 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, inplace_a_and_b) {
5197 TEST_REQUIRES_X86_AVX2;
5198 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5199 VAddMicrokernelTester()
5200 .batch_size(batch_size)
5201 .inplace_a(true)
5202 .inplace_b(true)
5203 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5204 }
5205 }
5206
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,a_zero_point)5207 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
5208 TEST_REQUIRES_X86_AVX2;
5209 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5211 VAddMicrokernelTester()
5212 .batch_size(batch_size)
5213 .a_zero_point(a_zero_point)
5214 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5215 }
5216 }
5217 }
5218
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,b_zero_point)5219 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
5220 TEST_REQUIRES_X86_AVX2;
5221 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5223 VAddMicrokernelTester()
5224 .batch_size(batch_size)
5225 .b_zero_point(b_zero_point)
5226 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5227 }
5228 }
5229 }
5230
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,y_zero_point)5231 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
5232 TEST_REQUIRES_X86_AVX2;
5233 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5235 VAddMicrokernelTester()
5236 .batch_size(batch_size)
5237 .y_zero_point(y_zero_point)
5238 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5239 }
5240 }
5241 }
5242
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,a_scale)5243 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
5244 TEST_REQUIRES_X86_AVX2;
5245 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5247 VAddMicrokernelTester()
5248 .batch_size(batch_size)
5249 .a_scale(a_scale)
5250 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5251 }
5252 }
5253 }
5254
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,b_scale)5255 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
5256 TEST_REQUIRES_X86_AVX2;
5257 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5259 VAddMicrokernelTester()
5260 .batch_size(batch_size)
5261 .b_scale(b_scale)
5262 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5263 }
5264 }
5265 }
5266
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,y_scale)5267 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
5268 TEST_REQUIRES_X86_AVX2;
5269 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5271 VAddMicrokernelTester()
5272 .batch_size(batch_size)
5273 .y_scale(y_scale)
5274 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5275 }
5276 }
5277 }
5278
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,qmin)5279 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
5280 TEST_REQUIRES_X86_AVX2;
5281 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5282 VAddMicrokernelTester()
5283 .batch_size(batch_size)
5284 .qmin(128)
5285 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5286 }
5287 }
5288
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24,qmax)5289 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
5290 TEST_REQUIRES_X86_AVX2;
5291 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5292 VAddMicrokernelTester()
5293 .batch_size(batch_size)
5294 .qmax(128)
5295 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
5296 }
5297 }
5298 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5299
5300
5301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_eq_32)5302 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
5303 TEST_REQUIRES_X86_AVX2;
5304 VAddMicrokernelTester()
5305 .batch_size(32)
5306 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5307 }
5308
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_div_32)5309 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
5310 TEST_REQUIRES_X86_AVX2;
5311 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5312 VAddMicrokernelTester()
5313 .batch_size(batch_size)
5314 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5315 }
5316 }
5317
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_lt_32)5318 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
5319 TEST_REQUIRES_X86_AVX2;
5320 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5321 VAddMicrokernelTester()
5322 .batch_size(batch_size)
5323 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5324 }
5325 }
5326
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,batch_gt_32)5327 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
5328 TEST_REQUIRES_X86_AVX2;
5329 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5330 VAddMicrokernelTester()
5331 .batch_size(batch_size)
5332 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5333 }
5334 }
5335
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,inplace_a)5336 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a) {
5337 TEST_REQUIRES_X86_AVX2;
5338 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5339 VAddMicrokernelTester()
5340 .batch_size(batch_size)
5341 .inplace_a(true)
5342 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5343 }
5344 }
5345
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,inplace_b)5346 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_b) {
5347 TEST_REQUIRES_X86_AVX2;
5348 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5349 VAddMicrokernelTester()
5350 .batch_size(batch_size)
5351 .inplace_b(true)
5352 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5353 }
5354 }
5355
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,inplace_a_and_b)5356 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, inplace_a_and_b) {
5357 TEST_REQUIRES_X86_AVX2;
5358 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5359 VAddMicrokernelTester()
5360 .batch_size(batch_size)
5361 .inplace_a(true)
5362 .inplace_b(true)
5363 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5364 }
5365 }
5366
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,a_zero_point)5367 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
5368 TEST_REQUIRES_X86_AVX2;
5369 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5371 VAddMicrokernelTester()
5372 .batch_size(batch_size)
5373 .a_zero_point(a_zero_point)
5374 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5375 }
5376 }
5377 }
5378
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,b_zero_point)5379 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
5380 TEST_REQUIRES_X86_AVX2;
5381 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5383 VAddMicrokernelTester()
5384 .batch_size(batch_size)
5385 .b_zero_point(b_zero_point)
5386 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5387 }
5388 }
5389 }
5390
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,y_zero_point)5391 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
5392 TEST_REQUIRES_X86_AVX2;
5393 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5395 VAddMicrokernelTester()
5396 .batch_size(batch_size)
5397 .y_zero_point(y_zero_point)
5398 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5399 }
5400 }
5401 }
5402
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,a_scale)5403 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
5404 TEST_REQUIRES_X86_AVX2;
5405 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5407 VAddMicrokernelTester()
5408 .batch_size(batch_size)
5409 .a_scale(a_scale)
5410 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5411 }
5412 }
5413 }
5414
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,b_scale)5415 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
5416 TEST_REQUIRES_X86_AVX2;
5417 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5419 VAddMicrokernelTester()
5420 .batch_size(batch_size)
5421 .b_scale(b_scale)
5422 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5423 }
5424 }
5425 }
5426
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,y_scale)5427 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
5428 TEST_REQUIRES_X86_AVX2;
5429 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5431 VAddMicrokernelTester()
5432 .batch_size(batch_size)
5433 .y_scale(y_scale)
5434 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5435 }
5436 }
5437 }
5438
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,qmin)5439 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
5440 TEST_REQUIRES_X86_AVX2;
5441 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5442 VAddMicrokernelTester()
5443 .batch_size(batch_size)
5444 .qmin(128)
5445 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5446 }
5447 }
5448
TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32,qmax)5449 TEST(QS8_VADD_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
5450 TEST_REQUIRES_X86_AVX2;
5451 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5452 VAddMicrokernelTester()
5453 .batch_size(batch_size)
5454 .qmax(128)
5455 .Test(xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
5456 }
5457 }
5458 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5459
5460
5461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_eq_16)5462 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
5463 TEST_REQUIRES_X86_AVX512SKX;
5464 VAddMicrokernelTester()
5465 .batch_size(16)
5466 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5467 }
5468
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_div_16)5469 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
5470 TEST_REQUIRES_X86_AVX512SKX;
5471 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5472 VAddMicrokernelTester()
5473 .batch_size(batch_size)
5474 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5475 }
5476 }
5477
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_lt_16)5478 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
5479 TEST_REQUIRES_X86_AVX512SKX;
5480 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5481 VAddMicrokernelTester()
5482 .batch_size(batch_size)
5483 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5484 }
5485 }
5486
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_gt_16)5487 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
5488 TEST_REQUIRES_X86_AVX512SKX;
5489 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5490 VAddMicrokernelTester()
5491 .batch_size(batch_size)
5492 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5493 }
5494 }
5495
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_a)5496 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a) {
5497 TEST_REQUIRES_X86_AVX512SKX;
5498 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5499 VAddMicrokernelTester()
5500 .batch_size(batch_size)
5501 .inplace_a(true)
5502 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5503 }
5504 }
5505
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_b)5506 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_b) {
5507 TEST_REQUIRES_X86_AVX512SKX;
5508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5509 VAddMicrokernelTester()
5510 .batch_size(batch_size)
5511 .inplace_b(true)
5512 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5513 }
5514 }
5515
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace_a_and_b)5516 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace_a_and_b) {
5517 TEST_REQUIRES_X86_AVX512SKX;
5518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5519 VAddMicrokernelTester()
5520 .batch_size(batch_size)
5521 .inplace_a(true)
5522 .inplace_b(true)
5523 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5524 }
5525 }
5526
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,a_zero_point)5527 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
5528 TEST_REQUIRES_X86_AVX512SKX;
5529 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5531 VAddMicrokernelTester()
5532 .batch_size(batch_size)
5533 .a_zero_point(a_zero_point)
5534 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5535 }
5536 }
5537 }
5538
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,b_zero_point)5539 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
5540 TEST_REQUIRES_X86_AVX512SKX;
5541 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5543 VAddMicrokernelTester()
5544 .batch_size(batch_size)
5545 .b_zero_point(b_zero_point)
5546 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5547 }
5548 }
5549 }
5550
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,y_zero_point)5551 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
5552 TEST_REQUIRES_X86_AVX512SKX;
5553 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5555 VAddMicrokernelTester()
5556 .batch_size(batch_size)
5557 .y_zero_point(y_zero_point)
5558 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5559 }
5560 }
5561 }
5562
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,a_scale)5563 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
5564 TEST_REQUIRES_X86_AVX512SKX;
5565 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5567 VAddMicrokernelTester()
5568 .batch_size(batch_size)
5569 .a_scale(a_scale)
5570 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5571 }
5572 }
5573 }
5574
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,b_scale)5575 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
5576 TEST_REQUIRES_X86_AVX512SKX;
5577 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5579 VAddMicrokernelTester()
5580 .batch_size(batch_size)
5581 .b_scale(b_scale)
5582 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5583 }
5584 }
5585 }
5586
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,y_scale)5587 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
5588 TEST_REQUIRES_X86_AVX512SKX;
5589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5591 VAddMicrokernelTester()
5592 .batch_size(batch_size)
5593 .y_scale(y_scale)
5594 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5595 }
5596 }
5597 }
5598
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,qmin)5599 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
5600 TEST_REQUIRES_X86_AVX512SKX;
5601 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5602 VAddMicrokernelTester()
5603 .batch_size(batch_size)
5604 .qmin(128)
5605 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5606 }
5607 }
5608
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16,qmax)5609 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
5610 TEST_REQUIRES_X86_AVX512SKX;
5611 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5612 VAddMicrokernelTester()
5613 .batch_size(batch_size)
5614 .qmax(128)
5615 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
5616 }
5617 }
5618 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5619
5620
5621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_eq_32)5622 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
5623 TEST_REQUIRES_X86_AVX512SKX;
5624 VAddMicrokernelTester()
5625 .batch_size(32)
5626 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5627 }
5628
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_div_32)5629 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
5630 TEST_REQUIRES_X86_AVX512SKX;
5631 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5632 VAddMicrokernelTester()
5633 .batch_size(batch_size)
5634 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5635 }
5636 }
5637
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_lt_32)5638 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
5639 TEST_REQUIRES_X86_AVX512SKX;
5640 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5641 VAddMicrokernelTester()
5642 .batch_size(batch_size)
5643 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5644 }
5645 }
5646
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_gt_32)5647 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
5648 TEST_REQUIRES_X86_AVX512SKX;
5649 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5650 VAddMicrokernelTester()
5651 .batch_size(batch_size)
5652 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5653 }
5654 }
5655
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_a)5656 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a) {
5657 TEST_REQUIRES_X86_AVX512SKX;
5658 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5659 VAddMicrokernelTester()
5660 .batch_size(batch_size)
5661 .inplace_a(true)
5662 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5663 }
5664 }
5665
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_b)5666 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_b) {
5667 TEST_REQUIRES_X86_AVX512SKX;
5668 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5669 VAddMicrokernelTester()
5670 .batch_size(batch_size)
5671 .inplace_b(true)
5672 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5673 }
5674 }
5675
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace_a_and_b)5676 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace_a_and_b) {
5677 TEST_REQUIRES_X86_AVX512SKX;
5678 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5679 VAddMicrokernelTester()
5680 .batch_size(batch_size)
5681 .inplace_a(true)
5682 .inplace_b(true)
5683 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5684 }
5685 }
5686
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,a_zero_point)5687 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
5688 TEST_REQUIRES_X86_AVX512SKX;
5689 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5691 VAddMicrokernelTester()
5692 .batch_size(batch_size)
5693 .a_zero_point(a_zero_point)
5694 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5695 }
5696 }
5697 }
5698
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,b_zero_point)5699 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
5700 TEST_REQUIRES_X86_AVX512SKX;
5701 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5703 VAddMicrokernelTester()
5704 .batch_size(batch_size)
5705 .b_zero_point(b_zero_point)
5706 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5707 }
5708 }
5709 }
5710
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,y_zero_point)5711 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
5712 TEST_REQUIRES_X86_AVX512SKX;
5713 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5715 VAddMicrokernelTester()
5716 .batch_size(batch_size)
5717 .y_zero_point(y_zero_point)
5718 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5719 }
5720 }
5721 }
5722
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,a_scale)5723 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
5724 TEST_REQUIRES_X86_AVX512SKX;
5725 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5727 VAddMicrokernelTester()
5728 .batch_size(batch_size)
5729 .a_scale(a_scale)
5730 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5731 }
5732 }
5733 }
5734
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,b_scale)5735 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
5736 TEST_REQUIRES_X86_AVX512SKX;
5737 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5739 VAddMicrokernelTester()
5740 .batch_size(batch_size)
5741 .b_scale(b_scale)
5742 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5743 }
5744 }
5745 }
5746
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,y_scale)5747 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
5748 TEST_REQUIRES_X86_AVX512SKX;
5749 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5751 VAddMicrokernelTester()
5752 .batch_size(batch_size)
5753 .y_scale(y_scale)
5754 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5755 }
5756 }
5757 }
5758
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,qmin)5759 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
5760 TEST_REQUIRES_X86_AVX512SKX;
5761 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5762 VAddMicrokernelTester()
5763 .batch_size(batch_size)
5764 .qmin(128)
5765 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5766 }
5767 }
5768
TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32,qmax)5769 TEST(QS8_VADD_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
5770 TEST_REQUIRES_X86_AVX512SKX;
5771 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5772 VAddMicrokernelTester()
5773 .batch_size(batch_size)
5774 .qmax(128)
5775 .Test(xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5776 }
5777 }
5778 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5779
5780
5781 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_eq_8)5782 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_eq_8) {
5783 VAddMicrokernelTester()
5784 .batch_size(8)
5785 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5786 }
5787
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_div_8)5788 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_div_8) {
5789 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5790 VAddMicrokernelTester()
5791 .batch_size(batch_size)
5792 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5793 }
5794 }
5795
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_lt_8)5796 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_lt_8) {
5797 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5798 VAddMicrokernelTester()
5799 .batch_size(batch_size)
5800 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5801 }
5802 }
5803
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,batch_gt_8)5804 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, batch_gt_8) {
5805 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5806 VAddMicrokernelTester()
5807 .batch_size(batch_size)
5808 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5809 }
5810 }
5811
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,inplace_a)5812 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a) {
5813 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5814 VAddMicrokernelTester()
5815 .batch_size(batch_size)
5816 .inplace_a(true)
5817 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5818 }
5819 }
5820
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,inplace_b)5821 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_b) {
5822 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5823 VAddMicrokernelTester()
5824 .batch_size(batch_size)
5825 .inplace_b(true)
5826 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5827 }
5828 }
5829
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,inplace_a_and_b)5830 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, inplace_a_and_b) {
5831 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5832 VAddMicrokernelTester()
5833 .batch_size(batch_size)
5834 .inplace_a(true)
5835 .inplace_b(true)
5836 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5837 }
5838 }
5839
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,a_zero_point)5840 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_zero_point) {
5841 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5842 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5843 VAddMicrokernelTester()
5844 .batch_size(batch_size)
5845 .a_zero_point(a_zero_point)
5846 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5847 }
5848 }
5849 }
5850
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,b_zero_point)5851 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_zero_point) {
5852 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5853 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5854 VAddMicrokernelTester()
5855 .batch_size(batch_size)
5856 .b_zero_point(b_zero_point)
5857 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5858 }
5859 }
5860 }
5861
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,y_zero_point)5862 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_zero_point) {
5863 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5864 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5865 VAddMicrokernelTester()
5866 .batch_size(batch_size)
5867 .y_zero_point(y_zero_point)
5868 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5869 }
5870 }
5871 }
5872
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,a_scale)5873 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, a_scale) {
5874 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5875 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5876 VAddMicrokernelTester()
5877 .batch_size(batch_size)
5878 .a_scale(a_scale)
5879 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5880 }
5881 }
5882 }
5883
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,b_scale)5884 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, b_scale) {
5885 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5886 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5887 VAddMicrokernelTester()
5888 .batch_size(batch_size)
5889 .b_scale(b_scale)
5890 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5891 }
5892 }
5893 }
5894
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,y_scale)5895 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, y_scale) {
5896 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5897 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5898 VAddMicrokernelTester()
5899 .batch_size(batch_size)
5900 .y_scale(y_scale)
5901 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5902 }
5903 }
5904 }
5905
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,qmin)5906 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmin) {
5907 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5908 VAddMicrokernelTester()
5909 .batch_size(batch_size)
5910 .qmin(128)
5911 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5912 }
5913 }
5914
TEST(QS8_VADD_MINMAX__WASMSIMD_X8,qmax)5915 TEST(QS8_VADD_MINMAX__WASMSIMD_X8, qmax) {
5916 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5917 VAddMicrokernelTester()
5918 .batch_size(batch_size)
5919 .qmax(128)
5920 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5921 }
5922 }
5923 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5924
5925
5926 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_eq_16)5927 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_eq_16) {
5928 VAddMicrokernelTester()
5929 .batch_size(16)
5930 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5931 }
5932
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_div_16)5933 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_div_16) {
5934 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5935 VAddMicrokernelTester()
5936 .batch_size(batch_size)
5937 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5938 }
5939 }
5940
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_lt_16)5941 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_lt_16) {
5942 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5943 VAddMicrokernelTester()
5944 .batch_size(batch_size)
5945 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5946 }
5947 }
5948
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,batch_gt_16)5949 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, batch_gt_16) {
5950 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5951 VAddMicrokernelTester()
5952 .batch_size(batch_size)
5953 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5954 }
5955 }
5956
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,inplace_a)5957 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a) {
5958 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5959 VAddMicrokernelTester()
5960 .batch_size(batch_size)
5961 .inplace_a(true)
5962 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5963 }
5964 }
5965
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,inplace_b)5966 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_b) {
5967 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5968 VAddMicrokernelTester()
5969 .batch_size(batch_size)
5970 .inplace_b(true)
5971 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5972 }
5973 }
5974
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,inplace_a_and_b)5975 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, inplace_a_and_b) {
5976 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5977 VAddMicrokernelTester()
5978 .batch_size(batch_size)
5979 .inplace_a(true)
5980 .inplace_b(true)
5981 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5982 }
5983 }
5984
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,a_zero_point)5985 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_zero_point) {
5986 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5987 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5988 VAddMicrokernelTester()
5989 .batch_size(batch_size)
5990 .a_zero_point(a_zero_point)
5991 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5992 }
5993 }
5994 }
5995
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,b_zero_point)5996 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_zero_point) {
5997 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5998 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5999 VAddMicrokernelTester()
6000 .batch_size(batch_size)
6001 .b_zero_point(b_zero_point)
6002 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6003 }
6004 }
6005 }
6006
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,y_zero_point)6007 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_zero_point) {
6008 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6009 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6010 VAddMicrokernelTester()
6011 .batch_size(batch_size)
6012 .y_zero_point(y_zero_point)
6013 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6014 }
6015 }
6016 }
6017
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,a_scale)6018 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, a_scale) {
6019 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6020 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6021 VAddMicrokernelTester()
6022 .batch_size(batch_size)
6023 .a_scale(a_scale)
6024 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6025 }
6026 }
6027 }
6028
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,b_scale)6029 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, b_scale) {
6030 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6031 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6032 VAddMicrokernelTester()
6033 .batch_size(batch_size)
6034 .b_scale(b_scale)
6035 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6036 }
6037 }
6038 }
6039
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,y_scale)6040 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, y_scale) {
6041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6042 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6043 VAddMicrokernelTester()
6044 .batch_size(batch_size)
6045 .y_scale(y_scale)
6046 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6047 }
6048 }
6049 }
6050
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,qmin)6051 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmin) {
6052 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6053 VAddMicrokernelTester()
6054 .batch_size(batch_size)
6055 .qmin(128)
6056 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6057 }
6058 }
6059
TEST(QS8_VADD_MINMAX__WASMSIMD_X16,qmax)6060 TEST(QS8_VADD_MINMAX__WASMSIMD_X16, qmax) {
6061 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6062 VAddMicrokernelTester()
6063 .batch_size(batch_size)
6064 .qmax(128)
6065 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
6066 }
6067 }
6068 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6069
6070
6071 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_eq_24)6072 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_eq_24) {
6073 VAddMicrokernelTester()
6074 .batch_size(24)
6075 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6076 }
6077
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_div_24)6078 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_div_24) {
6079 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6080 VAddMicrokernelTester()
6081 .batch_size(batch_size)
6082 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6083 }
6084 }
6085
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_lt_24)6086 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_lt_24) {
6087 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6088 VAddMicrokernelTester()
6089 .batch_size(batch_size)
6090 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6091 }
6092 }
6093
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,batch_gt_24)6094 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, batch_gt_24) {
6095 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6096 VAddMicrokernelTester()
6097 .batch_size(batch_size)
6098 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6099 }
6100 }
6101
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,inplace_a)6102 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a) {
6103 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6104 VAddMicrokernelTester()
6105 .batch_size(batch_size)
6106 .inplace_a(true)
6107 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6108 }
6109 }
6110
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,inplace_b)6111 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_b) {
6112 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6113 VAddMicrokernelTester()
6114 .batch_size(batch_size)
6115 .inplace_b(true)
6116 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6117 }
6118 }
6119
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,inplace_a_and_b)6120 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, inplace_a_and_b) {
6121 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6122 VAddMicrokernelTester()
6123 .batch_size(batch_size)
6124 .inplace_a(true)
6125 .inplace_b(true)
6126 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6127 }
6128 }
6129
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,a_zero_point)6130 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_zero_point) {
6131 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6132 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6133 VAddMicrokernelTester()
6134 .batch_size(batch_size)
6135 .a_zero_point(a_zero_point)
6136 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6137 }
6138 }
6139 }
6140
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,b_zero_point)6141 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_zero_point) {
6142 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6143 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6144 VAddMicrokernelTester()
6145 .batch_size(batch_size)
6146 .b_zero_point(b_zero_point)
6147 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6148 }
6149 }
6150 }
6151
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,y_zero_point)6152 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_zero_point) {
6153 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6154 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6155 VAddMicrokernelTester()
6156 .batch_size(batch_size)
6157 .y_zero_point(y_zero_point)
6158 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6159 }
6160 }
6161 }
6162
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,a_scale)6163 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, a_scale) {
6164 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6165 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6166 VAddMicrokernelTester()
6167 .batch_size(batch_size)
6168 .a_scale(a_scale)
6169 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6170 }
6171 }
6172 }
6173
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,b_scale)6174 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, b_scale) {
6175 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6176 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6177 VAddMicrokernelTester()
6178 .batch_size(batch_size)
6179 .b_scale(b_scale)
6180 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6181 }
6182 }
6183 }
6184
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,y_scale)6185 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, y_scale) {
6186 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6187 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6188 VAddMicrokernelTester()
6189 .batch_size(batch_size)
6190 .y_scale(y_scale)
6191 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6192 }
6193 }
6194 }
6195
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,qmin)6196 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmin) {
6197 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6198 VAddMicrokernelTester()
6199 .batch_size(batch_size)
6200 .qmin(128)
6201 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6202 }
6203 }
6204
TEST(QS8_VADD_MINMAX__WASMSIMD_X24,qmax)6205 TEST(QS8_VADD_MINMAX__WASMSIMD_X24, qmax) {
6206 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6207 VAddMicrokernelTester()
6208 .batch_size(batch_size)
6209 .qmax(128)
6210 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
6211 }
6212 }
6213 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6214
6215
6216 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_eq_32)6217 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_eq_32) {
6218 VAddMicrokernelTester()
6219 .batch_size(32)
6220 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6221 }
6222
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_div_32)6223 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_div_32) {
6224 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6225 VAddMicrokernelTester()
6226 .batch_size(batch_size)
6227 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6228 }
6229 }
6230
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_lt_32)6231 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_lt_32) {
6232 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6233 VAddMicrokernelTester()
6234 .batch_size(batch_size)
6235 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6236 }
6237 }
6238
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,batch_gt_32)6239 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, batch_gt_32) {
6240 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6241 VAddMicrokernelTester()
6242 .batch_size(batch_size)
6243 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6244 }
6245 }
6246
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,inplace_a)6247 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a) {
6248 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6249 VAddMicrokernelTester()
6250 .batch_size(batch_size)
6251 .inplace_a(true)
6252 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6253 }
6254 }
6255
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,inplace_b)6256 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_b) {
6257 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6258 VAddMicrokernelTester()
6259 .batch_size(batch_size)
6260 .inplace_b(true)
6261 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6262 }
6263 }
6264
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,inplace_a_and_b)6265 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, inplace_a_and_b) {
6266 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6267 VAddMicrokernelTester()
6268 .batch_size(batch_size)
6269 .inplace_a(true)
6270 .inplace_b(true)
6271 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6272 }
6273 }
6274
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,a_zero_point)6275 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_zero_point) {
6276 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6277 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6278 VAddMicrokernelTester()
6279 .batch_size(batch_size)
6280 .a_zero_point(a_zero_point)
6281 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6282 }
6283 }
6284 }
6285
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,b_zero_point)6286 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_zero_point) {
6287 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6288 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6289 VAddMicrokernelTester()
6290 .batch_size(batch_size)
6291 .b_zero_point(b_zero_point)
6292 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6293 }
6294 }
6295 }
6296
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,y_zero_point)6297 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_zero_point) {
6298 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6299 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6300 VAddMicrokernelTester()
6301 .batch_size(batch_size)
6302 .y_zero_point(y_zero_point)
6303 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6304 }
6305 }
6306 }
6307
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,a_scale)6308 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, a_scale) {
6309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6310 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6311 VAddMicrokernelTester()
6312 .batch_size(batch_size)
6313 .a_scale(a_scale)
6314 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6315 }
6316 }
6317 }
6318
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,b_scale)6319 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, b_scale) {
6320 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6321 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6322 VAddMicrokernelTester()
6323 .batch_size(batch_size)
6324 .b_scale(b_scale)
6325 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6326 }
6327 }
6328 }
6329
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,y_scale)6330 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, y_scale) {
6331 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6332 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6333 VAddMicrokernelTester()
6334 .batch_size(batch_size)
6335 .y_scale(y_scale)
6336 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6337 }
6338 }
6339 }
6340
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,qmin)6341 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmin) {
6342 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6343 VAddMicrokernelTester()
6344 .batch_size(batch_size)
6345 .qmin(128)
6346 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6347 }
6348 }
6349
TEST(QS8_VADD_MINMAX__WASMSIMD_X32,qmax)6350 TEST(QS8_VADD_MINMAX__WASMSIMD_X32, qmax) {
6351 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6352 VAddMicrokernelTester()
6353 .batch_size(batch_size)
6354 .qmax(128)
6355 .Test(xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
6356 }
6357 }
6358 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6359
6360
TEST(QS8_VADD_MINMAX__SCALAR_X1,batch_eq_1)6361 TEST(QS8_VADD_MINMAX__SCALAR_X1, batch_eq_1) {
6362 VAddMicrokernelTester()
6363 .batch_size(1)
6364 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6365 }
6366
TEST(QS8_VADD_MINMAX__SCALAR_X1,batch_gt_1)6367 TEST(QS8_VADD_MINMAX__SCALAR_X1, batch_gt_1) {
6368 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
6369 VAddMicrokernelTester()
6370 .batch_size(batch_size)
6371 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6372 }
6373 }
6374
TEST(QS8_VADD_MINMAX__SCALAR_X1,inplace_a)6375 TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_a) {
6376 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6377 VAddMicrokernelTester()
6378 .batch_size(batch_size)
6379 .inplace_a(true)
6380 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6381 }
6382 }
6383
TEST(QS8_VADD_MINMAX__SCALAR_X1,inplace_b)6384 TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_b) {
6385 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6386 VAddMicrokernelTester()
6387 .batch_size(batch_size)
6388 .inplace_b(true)
6389 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6390 }
6391 }
6392
TEST(QS8_VADD_MINMAX__SCALAR_X1,inplace_a_and_b)6393 TEST(QS8_VADD_MINMAX__SCALAR_X1, inplace_a_and_b) {
6394 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6395 VAddMicrokernelTester()
6396 .batch_size(batch_size)
6397 .inplace_a(true)
6398 .inplace_b(true)
6399 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6400 }
6401 }
6402
TEST(QS8_VADD_MINMAX__SCALAR_X1,a_zero_point)6403 TEST(QS8_VADD_MINMAX__SCALAR_X1, a_zero_point) {
6404 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6405 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6406 VAddMicrokernelTester()
6407 .batch_size(batch_size)
6408 .a_zero_point(a_zero_point)
6409 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6410 }
6411 }
6412 }
6413
TEST(QS8_VADD_MINMAX__SCALAR_X1,b_zero_point)6414 TEST(QS8_VADD_MINMAX__SCALAR_X1, b_zero_point) {
6415 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6416 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6417 VAddMicrokernelTester()
6418 .batch_size(batch_size)
6419 .b_zero_point(b_zero_point)
6420 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6421 }
6422 }
6423 }
6424
TEST(QS8_VADD_MINMAX__SCALAR_X1,y_zero_point)6425 TEST(QS8_VADD_MINMAX__SCALAR_X1, y_zero_point) {
6426 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6427 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6428 VAddMicrokernelTester()
6429 .batch_size(batch_size)
6430 .y_zero_point(y_zero_point)
6431 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6432 }
6433 }
6434 }
6435
TEST(QS8_VADD_MINMAX__SCALAR_X1,a_scale)6436 TEST(QS8_VADD_MINMAX__SCALAR_X1, a_scale) {
6437 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6438 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6439 VAddMicrokernelTester()
6440 .batch_size(batch_size)
6441 .a_scale(a_scale)
6442 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6443 }
6444 }
6445 }
6446
TEST(QS8_VADD_MINMAX__SCALAR_X1,b_scale)6447 TEST(QS8_VADD_MINMAX__SCALAR_X1, b_scale) {
6448 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6449 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6450 VAddMicrokernelTester()
6451 .batch_size(batch_size)
6452 .b_scale(b_scale)
6453 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6454 }
6455 }
6456 }
6457
TEST(QS8_VADD_MINMAX__SCALAR_X1,y_scale)6458 TEST(QS8_VADD_MINMAX__SCALAR_X1, y_scale) {
6459 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6460 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6461 VAddMicrokernelTester()
6462 .batch_size(batch_size)
6463 .y_scale(y_scale)
6464 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6465 }
6466 }
6467 }
6468
TEST(QS8_VADD_MINMAX__SCALAR_X1,qmin)6469 TEST(QS8_VADD_MINMAX__SCALAR_X1, qmin) {
6470 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6471 VAddMicrokernelTester()
6472 .batch_size(batch_size)
6473 .qmin(128)
6474 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6475 }
6476 }
6477
TEST(QS8_VADD_MINMAX__SCALAR_X1,qmax)6478 TEST(QS8_VADD_MINMAX__SCALAR_X1, qmax) {
6479 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
6480 VAddMicrokernelTester()
6481 .batch_size(batch_size)
6482 .qmax(128)
6483 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
6484 }
6485 }
6486
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_eq_2)6487 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_eq_2) {
6488 VAddMicrokernelTester()
6489 .batch_size(2)
6490 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6491 }
6492
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_div_2)6493 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_div_2) {
6494 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
6495 VAddMicrokernelTester()
6496 .batch_size(batch_size)
6497 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6498 }
6499 }
6500
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_lt_2)6501 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_lt_2) {
6502 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
6503 VAddMicrokernelTester()
6504 .batch_size(batch_size)
6505 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6506 }
6507 }
6508
TEST(QS8_VADD_MINMAX__SCALAR_X2,batch_gt_2)6509 TEST(QS8_VADD_MINMAX__SCALAR_X2, batch_gt_2) {
6510 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
6511 VAddMicrokernelTester()
6512 .batch_size(batch_size)
6513 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6514 }
6515 }
6516
TEST(QS8_VADD_MINMAX__SCALAR_X2,inplace_a)6517 TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_a) {
6518 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6519 VAddMicrokernelTester()
6520 .batch_size(batch_size)
6521 .inplace_a(true)
6522 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6523 }
6524 }
6525
TEST(QS8_VADD_MINMAX__SCALAR_X2,inplace_b)6526 TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_b) {
6527 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6528 VAddMicrokernelTester()
6529 .batch_size(batch_size)
6530 .inplace_b(true)
6531 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6532 }
6533 }
6534
TEST(QS8_VADD_MINMAX__SCALAR_X2,inplace_a_and_b)6535 TEST(QS8_VADD_MINMAX__SCALAR_X2, inplace_a_and_b) {
6536 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6537 VAddMicrokernelTester()
6538 .batch_size(batch_size)
6539 .inplace_a(true)
6540 .inplace_b(true)
6541 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6542 }
6543 }
6544
TEST(QS8_VADD_MINMAX__SCALAR_X2,a_zero_point)6545 TEST(QS8_VADD_MINMAX__SCALAR_X2, a_zero_point) {
6546 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6547 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6548 VAddMicrokernelTester()
6549 .batch_size(batch_size)
6550 .a_zero_point(a_zero_point)
6551 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6552 }
6553 }
6554 }
6555
TEST(QS8_VADD_MINMAX__SCALAR_X2,b_zero_point)6556 TEST(QS8_VADD_MINMAX__SCALAR_X2, b_zero_point) {
6557 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6558 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6559 VAddMicrokernelTester()
6560 .batch_size(batch_size)
6561 .b_zero_point(b_zero_point)
6562 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6563 }
6564 }
6565 }
6566
TEST(QS8_VADD_MINMAX__SCALAR_X2,y_zero_point)6567 TEST(QS8_VADD_MINMAX__SCALAR_X2, y_zero_point) {
6568 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6569 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6570 VAddMicrokernelTester()
6571 .batch_size(batch_size)
6572 .y_zero_point(y_zero_point)
6573 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6574 }
6575 }
6576 }
6577
TEST(QS8_VADD_MINMAX__SCALAR_X2,a_scale)6578 TEST(QS8_VADD_MINMAX__SCALAR_X2, a_scale) {
6579 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6580 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6581 VAddMicrokernelTester()
6582 .batch_size(batch_size)
6583 .a_scale(a_scale)
6584 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6585 }
6586 }
6587 }
6588
TEST(QS8_VADD_MINMAX__SCALAR_X2,b_scale)6589 TEST(QS8_VADD_MINMAX__SCALAR_X2, b_scale) {
6590 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6591 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6592 VAddMicrokernelTester()
6593 .batch_size(batch_size)
6594 .b_scale(b_scale)
6595 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6596 }
6597 }
6598 }
6599
TEST(QS8_VADD_MINMAX__SCALAR_X2,y_scale)6600 TEST(QS8_VADD_MINMAX__SCALAR_X2, y_scale) {
6601 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6602 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6603 VAddMicrokernelTester()
6604 .batch_size(batch_size)
6605 .y_scale(y_scale)
6606 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6607 }
6608 }
6609 }
6610
TEST(QS8_VADD_MINMAX__SCALAR_X2,qmin)6611 TEST(QS8_VADD_MINMAX__SCALAR_X2, qmin) {
6612 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6613 VAddMicrokernelTester()
6614 .batch_size(batch_size)
6615 .qmin(128)
6616 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6617 }
6618 }
6619
TEST(QS8_VADD_MINMAX__SCALAR_X2,qmax)6620 TEST(QS8_VADD_MINMAX__SCALAR_X2, qmax) {
6621 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
6622 VAddMicrokernelTester()
6623 .batch_size(batch_size)
6624 .qmax(128)
6625 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
6626 }
6627 }
6628
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_eq_4)6629 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_eq_4) {
6630 VAddMicrokernelTester()
6631 .batch_size(4)
6632 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6633 }
6634
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_div_4)6635 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_div_4) {
6636 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
6637 VAddMicrokernelTester()
6638 .batch_size(batch_size)
6639 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6640 }
6641 }
6642
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_lt_4)6643 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_lt_4) {
6644 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
6645 VAddMicrokernelTester()
6646 .batch_size(batch_size)
6647 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6648 }
6649 }
6650
TEST(QS8_VADD_MINMAX__SCALAR_X4,batch_gt_4)6651 TEST(QS8_VADD_MINMAX__SCALAR_X4, batch_gt_4) {
6652 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
6653 VAddMicrokernelTester()
6654 .batch_size(batch_size)
6655 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6656 }
6657 }
6658
TEST(QS8_VADD_MINMAX__SCALAR_X4,inplace_a)6659 TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_a) {
6660 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6661 VAddMicrokernelTester()
6662 .batch_size(batch_size)
6663 .inplace_a(true)
6664 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6665 }
6666 }
6667
TEST(QS8_VADD_MINMAX__SCALAR_X4,inplace_b)6668 TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_b) {
6669 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6670 VAddMicrokernelTester()
6671 .batch_size(batch_size)
6672 .inplace_b(true)
6673 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6674 }
6675 }
6676
TEST(QS8_VADD_MINMAX__SCALAR_X4,inplace_a_and_b)6677 TEST(QS8_VADD_MINMAX__SCALAR_X4, inplace_a_and_b) {
6678 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6679 VAddMicrokernelTester()
6680 .batch_size(batch_size)
6681 .inplace_a(true)
6682 .inplace_b(true)
6683 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6684 }
6685 }
6686
TEST(QS8_VADD_MINMAX__SCALAR_X4,a_zero_point)6687 TEST(QS8_VADD_MINMAX__SCALAR_X4, a_zero_point) {
6688 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6689 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
6690 VAddMicrokernelTester()
6691 .batch_size(batch_size)
6692 .a_zero_point(a_zero_point)
6693 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6694 }
6695 }
6696 }
6697
TEST(QS8_VADD_MINMAX__SCALAR_X4,b_zero_point)6698 TEST(QS8_VADD_MINMAX__SCALAR_X4, b_zero_point) {
6699 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6700 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
6701 VAddMicrokernelTester()
6702 .batch_size(batch_size)
6703 .b_zero_point(b_zero_point)
6704 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6705 }
6706 }
6707 }
6708
TEST(QS8_VADD_MINMAX__SCALAR_X4,y_zero_point)6709 TEST(QS8_VADD_MINMAX__SCALAR_X4, y_zero_point) {
6710 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6711 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
6712 VAddMicrokernelTester()
6713 .batch_size(batch_size)
6714 .y_zero_point(y_zero_point)
6715 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6716 }
6717 }
6718 }
6719
TEST(QS8_VADD_MINMAX__SCALAR_X4,a_scale)6720 TEST(QS8_VADD_MINMAX__SCALAR_X4, a_scale) {
6721 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6722 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
6723 VAddMicrokernelTester()
6724 .batch_size(batch_size)
6725 .a_scale(a_scale)
6726 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6727 }
6728 }
6729 }
6730
TEST(QS8_VADD_MINMAX__SCALAR_X4,b_scale)6731 TEST(QS8_VADD_MINMAX__SCALAR_X4, b_scale) {
6732 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6733 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
6734 VAddMicrokernelTester()
6735 .batch_size(batch_size)
6736 .b_scale(b_scale)
6737 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6738 }
6739 }
6740 }
6741
TEST(QS8_VADD_MINMAX__SCALAR_X4,y_scale)6742 TEST(QS8_VADD_MINMAX__SCALAR_X4, y_scale) {
6743 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6744 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
6745 VAddMicrokernelTester()
6746 .batch_size(batch_size)
6747 .y_scale(y_scale)
6748 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6749 }
6750 }
6751 }
6752
TEST(QS8_VADD_MINMAX__SCALAR_X4,qmin)6753 TEST(QS8_VADD_MINMAX__SCALAR_X4, qmin) {
6754 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6755 VAddMicrokernelTester()
6756 .batch_size(batch_size)
6757 .qmin(128)
6758 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6759 }
6760 }
6761
TEST(QS8_VADD_MINMAX__SCALAR_X4,qmax)6762 TEST(QS8_VADD_MINMAX__SCALAR_X4, qmax) {
6763 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
6764 VAddMicrokernelTester()
6765 .batch_size(batch_size)
6766 .qmax(128)
6767 .Test(xnn_qs8_vadd_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
6768 }
6769 }