xref: /aosp_15_r20/external/XNNPACK/test/qs8-vmulc-minmax-fp32.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/qs8-vmulc-minmax-fp32.yaml
8 //   Generator: tools/generate-vbinary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vmul.h>
18 #include "vmulc-microkernel-tester.h"
19 
20 
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_eq_8)22   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_eq_8) {
23     TEST_REQUIRES_ARM_NEON;
24     VMulCMicrokernelTester()
25       .batch_size(8)
26       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
27   }
28 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_div_8)29   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_div_8) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32       VMulCMicrokernelTester()
33         .batch_size(batch_size)
34         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
35     }
36   }
37 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_lt_8)38   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_lt_8) {
39     TEST_REQUIRES_ARM_NEON;
40     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41       VMulCMicrokernelTester()
42         .batch_size(batch_size)
43         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
44     }
45   }
46 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_gt_8)47   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_gt_8) {
48     TEST_REQUIRES_ARM_NEON;
49     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50       VMulCMicrokernelTester()
51         .batch_size(batch_size)
52         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53     }
54   }
55 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,inplace)56   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, inplace) {
57     TEST_REQUIRES_ARM_NEON;
58     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59       VMulCMicrokernelTester()
60         .batch_size(batch_size)
61         .inplace(true)
62         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
63     }
64   }
65 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,a_zero_point)66   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, a_zero_point) {
67     TEST_REQUIRES_ARM_NEON;
68     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
70         VMulCMicrokernelTester()
71           .batch_size(batch_size)
72           .a_zero_point(a_zero_point)
73           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
74       }
75     }
76   }
77 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,b_zero_point)78   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, b_zero_point) {
79     TEST_REQUIRES_ARM_NEON;
80     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
81       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
82         VMulCMicrokernelTester()
83           .batch_size(batch_size)
84           .b_zero_point(b_zero_point)
85           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
86       }
87     }
88   }
89 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,y_zero_point)90   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, y_zero_point) {
91     TEST_REQUIRES_ARM_NEON;
92     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
93       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
94         VMulCMicrokernelTester()
95           .batch_size(batch_size)
96           .y_zero_point(y_zero_point)
97           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
98       }
99     }
100   }
101 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,a_scale)102   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, a_scale) {
103     TEST_REQUIRES_ARM_NEON;
104     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
106         VMulCMicrokernelTester()
107           .batch_size(batch_size)
108           .a_scale(a_scale)
109           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
110       }
111     }
112   }
113 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,b_scale)114   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, b_scale) {
115     TEST_REQUIRES_ARM_NEON;
116     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
117       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
118         VMulCMicrokernelTester()
119           .batch_size(batch_size)
120           .b_scale(b_scale)
121           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
122       }
123     }
124   }
125 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,y_scale)126   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, y_scale) {
127     TEST_REQUIRES_ARM_NEON;
128     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
129       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
130         VMulCMicrokernelTester()
131           .batch_size(batch_size)
132           .y_scale(y_scale)
133           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
134       }
135     }
136   }
137 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,qmin)138   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, qmin) {
139     TEST_REQUIRES_ARM_NEON;
140     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141       VMulCMicrokernelTester()
142         .batch_size(batch_size)
143         .qmin(128)
144         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
145     }
146   }
147 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,qmax)148   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, qmax) {
149     TEST_REQUIRES_ARM_NEON;
150     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
151       VMulCMicrokernelTester()
152         .batch_size(batch_size)
153         .qmax(128)
154         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
155     }
156   }
157 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
158 
159 
160 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_eq_16)161   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_eq_16) {
162     TEST_REQUIRES_ARM_NEON;
163     VMulCMicrokernelTester()
164       .batch_size(16)
165       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
166   }
167 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_div_16)168   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_div_16) {
169     TEST_REQUIRES_ARM_NEON;
170     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
171       VMulCMicrokernelTester()
172         .batch_size(batch_size)
173         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
174     }
175   }
176 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_lt_16)177   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_lt_16) {
178     TEST_REQUIRES_ARM_NEON;
179     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
180       VMulCMicrokernelTester()
181         .batch_size(batch_size)
182         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
183     }
184   }
185 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_gt_16)186   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_gt_16) {
187     TEST_REQUIRES_ARM_NEON;
188     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
189       VMulCMicrokernelTester()
190         .batch_size(batch_size)
191         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
192     }
193   }
194 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,inplace)195   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, inplace) {
196     TEST_REQUIRES_ARM_NEON;
197     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
198       VMulCMicrokernelTester()
199         .batch_size(batch_size)
200         .inplace(true)
201         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
202     }
203   }
204 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,a_zero_point)205   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, a_zero_point) {
206     TEST_REQUIRES_ARM_NEON;
207     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
208       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
209         VMulCMicrokernelTester()
210           .batch_size(batch_size)
211           .a_zero_point(a_zero_point)
212           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
213       }
214     }
215   }
216 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,b_zero_point)217   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, b_zero_point) {
218     TEST_REQUIRES_ARM_NEON;
219     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
220       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
221         VMulCMicrokernelTester()
222           .batch_size(batch_size)
223           .b_zero_point(b_zero_point)
224           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
225       }
226     }
227   }
228 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,y_zero_point)229   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, y_zero_point) {
230     TEST_REQUIRES_ARM_NEON;
231     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
232       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
233         VMulCMicrokernelTester()
234           .batch_size(batch_size)
235           .y_zero_point(y_zero_point)
236           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
237       }
238     }
239   }
240 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,a_scale)241   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, a_scale) {
242     TEST_REQUIRES_ARM_NEON;
243     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
244       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
245         VMulCMicrokernelTester()
246           .batch_size(batch_size)
247           .a_scale(a_scale)
248           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
249       }
250     }
251   }
252 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,b_scale)253   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, b_scale) {
254     TEST_REQUIRES_ARM_NEON;
255     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
256       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
257         VMulCMicrokernelTester()
258           .batch_size(batch_size)
259           .b_scale(b_scale)
260           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
261       }
262     }
263   }
264 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,y_scale)265   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, y_scale) {
266     TEST_REQUIRES_ARM_NEON;
267     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
268       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
269         VMulCMicrokernelTester()
270           .batch_size(batch_size)
271           .y_scale(y_scale)
272           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
273       }
274     }
275   }
276 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,qmin)277   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, qmin) {
278     TEST_REQUIRES_ARM_NEON;
279     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
280       VMulCMicrokernelTester()
281         .batch_size(batch_size)
282         .qmin(128)
283         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
284     }
285   }
286 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,qmax)287   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, qmax) {
288     TEST_REQUIRES_ARM_NEON;
289     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
290       VMulCMicrokernelTester()
291         .batch_size(batch_size)
292         .qmax(128)
293         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
294     }
295   }
296 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
297 
298 
299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_eq_16)300   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_eq_16) {
301     TEST_REQUIRES_ARM_NEON;
302     VMulCMicrokernelTester()
303       .batch_size(16)
304       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
305   }
306 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_div_16)307   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_div_16) {
308     TEST_REQUIRES_ARM_NEON;
309     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
310       VMulCMicrokernelTester()
311         .batch_size(batch_size)
312         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
313     }
314   }
315 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_lt_16)316   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_lt_16) {
317     TEST_REQUIRES_ARM_NEON;
318     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
319       VMulCMicrokernelTester()
320         .batch_size(batch_size)
321         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
322     }
323   }
324 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_gt_16)325   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_gt_16) {
326     TEST_REQUIRES_ARM_NEON;
327     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
328       VMulCMicrokernelTester()
329         .batch_size(batch_size)
330         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
331     }
332   }
333 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,inplace)334   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, inplace) {
335     TEST_REQUIRES_ARM_NEON;
336     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
337       VMulCMicrokernelTester()
338         .batch_size(batch_size)
339         .inplace(true)
340         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
341     }
342   }
343 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,a_zero_point)344   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, a_zero_point) {
345     TEST_REQUIRES_ARM_NEON;
346     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
347       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
348         VMulCMicrokernelTester()
349           .batch_size(batch_size)
350           .a_zero_point(a_zero_point)
351           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
352       }
353     }
354   }
355 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,b_zero_point)356   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, b_zero_point) {
357     TEST_REQUIRES_ARM_NEON;
358     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
359       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
360         VMulCMicrokernelTester()
361           .batch_size(batch_size)
362           .b_zero_point(b_zero_point)
363           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
364       }
365     }
366   }
367 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,y_zero_point)368   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, y_zero_point) {
369     TEST_REQUIRES_ARM_NEON;
370     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
371       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
372         VMulCMicrokernelTester()
373           .batch_size(batch_size)
374           .y_zero_point(y_zero_point)
375           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
376       }
377     }
378   }
379 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,a_scale)380   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, a_scale) {
381     TEST_REQUIRES_ARM_NEON;
382     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
383       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
384         VMulCMicrokernelTester()
385           .batch_size(batch_size)
386           .a_scale(a_scale)
387           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
388       }
389     }
390   }
391 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,b_scale)392   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, b_scale) {
393     TEST_REQUIRES_ARM_NEON;
394     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
395       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
396         VMulCMicrokernelTester()
397           .batch_size(batch_size)
398           .b_scale(b_scale)
399           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
400       }
401     }
402   }
403 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,y_scale)404   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, y_scale) {
405     TEST_REQUIRES_ARM_NEON;
406     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
407       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
408         VMulCMicrokernelTester()
409           .batch_size(batch_size)
410           .y_scale(y_scale)
411           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
412       }
413     }
414   }
415 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,qmin)416   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, qmin) {
417     TEST_REQUIRES_ARM_NEON;
418     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
419       VMulCMicrokernelTester()
420         .batch_size(batch_size)
421         .qmin(128)
422         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
423     }
424   }
425 
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,qmax)426   TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, qmax) {
427     TEST_REQUIRES_ARM_NEON;
428     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
429       VMulCMicrokernelTester()
430         .batch_size(batch_size)
431         .qmax(128)
432         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
433     }
434   }
435 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
436 
437 
438 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_eq_8)439   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_eq_8) {
440     TEST_REQUIRES_ARM_NEON_V8;
441     VMulCMicrokernelTester()
442       .batch_size(8)
443       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
444   }
445 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_div_8)446   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_div_8) {
447     TEST_REQUIRES_ARM_NEON_V8;
448     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
449       VMulCMicrokernelTester()
450         .batch_size(batch_size)
451         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
452     }
453   }
454 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_lt_8)455   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_lt_8) {
456     TEST_REQUIRES_ARM_NEON_V8;
457     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
458       VMulCMicrokernelTester()
459         .batch_size(batch_size)
460         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
461     }
462   }
463 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_gt_8)464   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_gt_8) {
465     TEST_REQUIRES_ARM_NEON_V8;
466     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
467       VMulCMicrokernelTester()
468         .batch_size(batch_size)
469         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
470     }
471   }
472 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,inplace)473   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, inplace) {
474     TEST_REQUIRES_ARM_NEON_V8;
475     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
476       VMulCMicrokernelTester()
477         .batch_size(batch_size)
478         .inplace(true)
479         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
480     }
481   }
482 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,a_zero_point)483   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, a_zero_point) {
484     TEST_REQUIRES_ARM_NEON_V8;
485     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
486       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
487         VMulCMicrokernelTester()
488           .batch_size(batch_size)
489           .a_zero_point(a_zero_point)
490           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
491       }
492     }
493   }
494 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,b_zero_point)495   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, b_zero_point) {
496     TEST_REQUIRES_ARM_NEON_V8;
497     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
498       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
499         VMulCMicrokernelTester()
500           .batch_size(batch_size)
501           .b_zero_point(b_zero_point)
502           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
503       }
504     }
505   }
506 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,y_zero_point)507   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, y_zero_point) {
508     TEST_REQUIRES_ARM_NEON_V8;
509     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
510       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
511         VMulCMicrokernelTester()
512           .batch_size(batch_size)
513           .y_zero_point(y_zero_point)
514           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
515       }
516     }
517   }
518 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,a_scale)519   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, a_scale) {
520     TEST_REQUIRES_ARM_NEON_V8;
521     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
522       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
523         VMulCMicrokernelTester()
524           .batch_size(batch_size)
525           .a_scale(a_scale)
526           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
527       }
528     }
529   }
530 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,b_scale)531   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, b_scale) {
532     TEST_REQUIRES_ARM_NEON_V8;
533     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
534       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
535         VMulCMicrokernelTester()
536           .batch_size(batch_size)
537           .b_scale(b_scale)
538           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
539       }
540     }
541   }
542 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,y_scale)543   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, y_scale) {
544     TEST_REQUIRES_ARM_NEON_V8;
545     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
546       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
547         VMulCMicrokernelTester()
548           .batch_size(batch_size)
549           .y_scale(y_scale)
550           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
551       }
552     }
553   }
554 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,qmin)555   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, qmin) {
556     TEST_REQUIRES_ARM_NEON_V8;
557     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
558       VMulCMicrokernelTester()
559         .batch_size(batch_size)
560         .qmin(128)
561         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
562     }
563   }
564 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,qmax)565   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, qmax) {
566     TEST_REQUIRES_ARM_NEON_V8;
567     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
568       VMulCMicrokernelTester()
569         .batch_size(batch_size)
570         .qmax(128)
571         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
572     }
573   }
574 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
575 
576 
577 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_eq_16)578   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_eq_16) {
579     TEST_REQUIRES_ARM_NEON_V8;
580     VMulCMicrokernelTester()
581       .batch_size(16)
582       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
583   }
584 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_div_16)585   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_div_16) {
586     TEST_REQUIRES_ARM_NEON_V8;
587     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
588       VMulCMicrokernelTester()
589         .batch_size(batch_size)
590         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
591     }
592   }
593 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_lt_16)594   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_lt_16) {
595     TEST_REQUIRES_ARM_NEON_V8;
596     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
597       VMulCMicrokernelTester()
598         .batch_size(batch_size)
599         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
600     }
601   }
602 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_gt_16)603   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_gt_16) {
604     TEST_REQUIRES_ARM_NEON_V8;
605     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
606       VMulCMicrokernelTester()
607         .batch_size(batch_size)
608         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
609     }
610   }
611 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,inplace)612   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, inplace) {
613     TEST_REQUIRES_ARM_NEON_V8;
614     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
615       VMulCMicrokernelTester()
616         .batch_size(batch_size)
617         .inplace(true)
618         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
619     }
620   }
621 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,a_zero_point)622   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, a_zero_point) {
623     TEST_REQUIRES_ARM_NEON_V8;
624     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
625       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
626         VMulCMicrokernelTester()
627           .batch_size(batch_size)
628           .a_zero_point(a_zero_point)
629           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
630       }
631     }
632   }
633 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,b_zero_point)634   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, b_zero_point) {
635     TEST_REQUIRES_ARM_NEON_V8;
636     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
637       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
638         VMulCMicrokernelTester()
639           .batch_size(batch_size)
640           .b_zero_point(b_zero_point)
641           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
642       }
643     }
644   }
645 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,y_zero_point)646   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, y_zero_point) {
647     TEST_REQUIRES_ARM_NEON_V8;
648     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
649       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
650         VMulCMicrokernelTester()
651           .batch_size(batch_size)
652           .y_zero_point(y_zero_point)
653           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
654       }
655     }
656   }
657 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,a_scale)658   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, a_scale) {
659     TEST_REQUIRES_ARM_NEON_V8;
660     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
661       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
662         VMulCMicrokernelTester()
663           .batch_size(batch_size)
664           .a_scale(a_scale)
665           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
666       }
667     }
668   }
669 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,b_scale)670   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, b_scale) {
671     TEST_REQUIRES_ARM_NEON_V8;
672     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
673       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
674         VMulCMicrokernelTester()
675           .batch_size(batch_size)
676           .b_scale(b_scale)
677           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
678       }
679     }
680   }
681 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,y_scale)682   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, y_scale) {
683     TEST_REQUIRES_ARM_NEON_V8;
684     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
685       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
686         VMulCMicrokernelTester()
687           .batch_size(batch_size)
688           .y_scale(y_scale)
689           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
690       }
691     }
692   }
693 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,qmin)694   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, qmin) {
695     TEST_REQUIRES_ARM_NEON_V8;
696     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
697       VMulCMicrokernelTester()
698         .batch_size(batch_size)
699         .qmin(128)
700         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
701     }
702   }
703 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,qmax)704   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, qmax) {
705     TEST_REQUIRES_ARM_NEON_V8;
706     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
707       VMulCMicrokernelTester()
708         .batch_size(batch_size)
709         .qmax(128)
710         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
711     }
712   }
713 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
714 
715 
716 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_eq_16)717   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_eq_16) {
718     TEST_REQUIRES_ARM_NEON_V8;
719     VMulCMicrokernelTester()
720       .batch_size(16)
721       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
722   }
723 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_div_16)724   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_div_16) {
725     TEST_REQUIRES_ARM_NEON_V8;
726     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
727       VMulCMicrokernelTester()
728         .batch_size(batch_size)
729         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
730     }
731   }
732 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_lt_16)733   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_lt_16) {
734     TEST_REQUIRES_ARM_NEON_V8;
735     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
736       VMulCMicrokernelTester()
737         .batch_size(batch_size)
738         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
739     }
740   }
741 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_gt_16)742   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_gt_16) {
743     TEST_REQUIRES_ARM_NEON_V8;
744     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
745       VMulCMicrokernelTester()
746         .batch_size(batch_size)
747         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
748     }
749   }
750 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,inplace)751   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, inplace) {
752     TEST_REQUIRES_ARM_NEON_V8;
753     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754       VMulCMicrokernelTester()
755         .batch_size(batch_size)
756         .inplace(true)
757         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
758     }
759   }
760 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,a_zero_point)761   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, a_zero_point) {
762     TEST_REQUIRES_ARM_NEON_V8;
763     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
764       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
765         VMulCMicrokernelTester()
766           .batch_size(batch_size)
767           .a_zero_point(a_zero_point)
768           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
769       }
770     }
771   }
772 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,b_zero_point)773   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, b_zero_point) {
774     TEST_REQUIRES_ARM_NEON_V8;
775     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
776       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
777         VMulCMicrokernelTester()
778           .batch_size(batch_size)
779           .b_zero_point(b_zero_point)
780           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
781       }
782     }
783   }
784 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,y_zero_point)785   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, y_zero_point) {
786     TEST_REQUIRES_ARM_NEON_V8;
787     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
788       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
789         VMulCMicrokernelTester()
790           .batch_size(batch_size)
791           .y_zero_point(y_zero_point)
792           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
793       }
794     }
795   }
796 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,a_scale)797   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, a_scale) {
798     TEST_REQUIRES_ARM_NEON_V8;
799     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
800       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
801         VMulCMicrokernelTester()
802           .batch_size(batch_size)
803           .a_scale(a_scale)
804           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
805       }
806     }
807   }
808 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,b_scale)809   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, b_scale) {
810     TEST_REQUIRES_ARM_NEON_V8;
811     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
813         VMulCMicrokernelTester()
814           .batch_size(batch_size)
815           .b_scale(b_scale)
816           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
817       }
818     }
819   }
820 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,y_scale)821   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, y_scale) {
822     TEST_REQUIRES_ARM_NEON_V8;
823     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
824       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
825         VMulCMicrokernelTester()
826           .batch_size(batch_size)
827           .y_scale(y_scale)
828           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
829       }
830     }
831   }
832 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,qmin)833   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, qmin) {
834     TEST_REQUIRES_ARM_NEON_V8;
835     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
836       VMulCMicrokernelTester()
837         .batch_size(batch_size)
838         .qmin(128)
839         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
840     }
841   }
842 
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,qmax)843   TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, qmax) {
844     TEST_REQUIRES_ARM_NEON_V8;
845     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
846       VMulCMicrokernelTester()
847         .batch_size(batch_size)
848         .qmax(128)
849         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
850     }
851   }
852 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
853 
854 
855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_eq_8)856   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_eq_8) {
857     TEST_REQUIRES_X86_SSE2;
858     VMulCMicrokernelTester()
859       .batch_size(8)
860       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
861   }
862 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_div_8)863   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_div_8) {
864     TEST_REQUIRES_X86_SSE2;
865     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
866       VMulCMicrokernelTester()
867         .batch_size(batch_size)
868         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
869     }
870   }
871 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_lt_8)872   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_lt_8) {
873     TEST_REQUIRES_X86_SSE2;
874     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
875       VMulCMicrokernelTester()
876         .batch_size(batch_size)
877         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
878     }
879   }
880 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_gt_8)881   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_gt_8) {
882     TEST_REQUIRES_X86_SSE2;
883     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
884       VMulCMicrokernelTester()
885         .batch_size(batch_size)
886         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
887     }
888   }
889 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace)890   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace) {
891     TEST_REQUIRES_X86_SSE2;
892     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
893       VMulCMicrokernelTester()
894         .batch_size(batch_size)
895         .inplace(true)
896         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
897     }
898   }
899 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_zero_point)900   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_zero_point) {
901     TEST_REQUIRES_X86_SSE2;
902     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
903       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
904         VMulCMicrokernelTester()
905           .batch_size(batch_size)
906           .a_zero_point(a_zero_point)
907           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
908       }
909     }
910   }
911 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_zero_point)912   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_zero_point) {
913     TEST_REQUIRES_X86_SSE2;
914     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
915       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
916         VMulCMicrokernelTester()
917           .batch_size(batch_size)
918           .b_zero_point(b_zero_point)
919           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
920       }
921     }
922   }
923 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_zero_point)924   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_zero_point) {
925     TEST_REQUIRES_X86_SSE2;
926     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
927       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
928         VMulCMicrokernelTester()
929           .batch_size(batch_size)
930           .y_zero_point(y_zero_point)
931           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
932       }
933     }
934   }
935 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_scale)936   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_scale) {
937     TEST_REQUIRES_X86_SSE2;
938     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
939       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
940         VMulCMicrokernelTester()
941           .batch_size(batch_size)
942           .a_scale(a_scale)
943           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
944       }
945     }
946   }
947 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_scale)948   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_scale) {
949     TEST_REQUIRES_X86_SSE2;
950     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
951       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
952         VMulCMicrokernelTester()
953           .batch_size(batch_size)
954           .b_scale(b_scale)
955           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
956       }
957     }
958   }
959 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_scale)960   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_scale) {
961     TEST_REQUIRES_X86_SSE2;
962     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
963       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
964         VMulCMicrokernelTester()
965           .batch_size(batch_size)
966           .y_scale(y_scale)
967           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
968       }
969     }
970   }
971 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmin)972   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmin) {
973     TEST_REQUIRES_X86_SSE2;
974     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
975       VMulCMicrokernelTester()
976         .batch_size(batch_size)
977         .qmin(128)
978         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
979     }
980   }
981 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmax)982   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmax) {
983     TEST_REQUIRES_X86_SSE2;
984     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
985       VMulCMicrokernelTester()
986         .batch_size(batch_size)
987         .qmax(128)
988         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
989     }
990   }
991 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
992 
993 
994 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_eq_16)995   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_eq_16) {
996     TEST_REQUIRES_X86_SSE2;
997     VMulCMicrokernelTester()
998       .batch_size(16)
999       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1000   }
1001 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_div_16)1002   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_div_16) {
1003     TEST_REQUIRES_X86_SSE2;
1004     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1005       VMulCMicrokernelTester()
1006         .batch_size(batch_size)
1007         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1008     }
1009   }
1010 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_lt_16)1011   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_lt_16) {
1012     TEST_REQUIRES_X86_SSE2;
1013     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1014       VMulCMicrokernelTester()
1015         .batch_size(batch_size)
1016         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1017     }
1018   }
1019 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_gt_16)1020   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_gt_16) {
1021     TEST_REQUIRES_X86_SSE2;
1022     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1023       VMulCMicrokernelTester()
1024         .batch_size(batch_size)
1025         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1026     }
1027   }
1028 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace)1029   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace) {
1030     TEST_REQUIRES_X86_SSE2;
1031     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1032       VMulCMicrokernelTester()
1033         .batch_size(batch_size)
1034         .inplace(true)
1035         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1036     }
1037   }
1038 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_zero_point)1039   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_zero_point) {
1040     TEST_REQUIRES_X86_SSE2;
1041     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1042       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1043         VMulCMicrokernelTester()
1044           .batch_size(batch_size)
1045           .a_zero_point(a_zero_point)
1046           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1047       }
1048     }
1049   }
1050 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_zero_point)1051   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_zero_point) {
1052     TEST_REQUIRES_X86_SSE2;
1053     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1054       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1055         VMulCMicrokernelTester()
1056           .batch_size(batch_size)
1057           .b_zero_point(b_zero_point)
1058           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1059       }
1060     }
1061   }
1062 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_zero_point)1063   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_zero_point) {
1064     TEST_REQUIRES_X86_SSE2;
1065     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1066       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1067         VMulCMicrokernelTester()
1068           .batch_size(batch_size)
1069           .y_zero_point(y_zero_point)
1070           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1071       }
1072     }
1073   }
1074 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_scale)1075   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_scale) {
1076     TEST_REQUIRES_X86_SSE2;
1077     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1079         VMulCMicrokernelTester()
1080           .batch_size(batch_size)
1081           .a_scale(a_scale)
1082           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1083       }
1084     }
1085   }
1086 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_scale)1087   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_scale) {
1088     TEST_REQUIRES_X86_SSE2;
1089     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1090       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1091         VMulCMicrokernelTester()
1092           .batch_size(batch_size)
1093           .b_scale(b_scale)
1094           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1095       }
1096     }
1097   }
1098 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_scale)1099   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_scale) {
1100     TEST_REQUIRES_X86_SSE2;
1101     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1102       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1103         VMulCMicrokernelTester()
1104           .batch_size(batch_size)
1105           .y_scale(y_scale)
1106           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1107       }
1108     }
1109   }
1110 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmin)1111   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmin) {
1112     TEST_REQUIRES_X86_SSE2;
1113     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1114       VMulCMicrokernelTester()
1115         .batch_size(batch_size)
1116         .qmin(128)
1117         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1118     }
1119   }
1120 
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmax)1121   TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmax) {
1122     TEST_REQUIRES_X86_SSE2;
1123     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1124       VMulCMicrokernelTester()
1125         .batch_size(batch_size)
1126         .qmax(128)
1127         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1128     }
1129   }
1130 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1131 
1132 
1133 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_eq_8)1134   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_eq_8) {
1135     TEST_REQUIRES_X86_SSE41;
1136     VMulCMicrokernelTester()
1137       .batch_size(8)
1138       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1139   }
1140 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_div_8)1141   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_div_8) {
1142     TEST_REQUIRES_X86_SSE41;
1143     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1144       VMulCMicrokernelTester()
1145         .batch_size(batch_size)
1146         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1147     }
1148   }
1149 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_lt_8)1150   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_lt_8) {
1151     TEST_REQUIRES_X86_SSE41;
1152     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1153       VMulCMicrokernelTester()
1154         .batch_size(batch_size)
1155         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1156     }
1157   }
1158 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_gt_8)1159   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_gt_8) {
1160     TEST_REQUIRES_X86_SSE41;
1161     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1162       VMulCMicrokernelTester()
1163         .batch_size(batch_size)
1164         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1165     }
1166   }
1167 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace)1168   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace) {
1169     TEST_REQUIRES_X86_SSE41;
1170     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1171       VMulCMicrokernelTester()
1172         .batch_size(batch_size)
1173         .inplace(true)
1174         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1175     }
1176   }
1177 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_zero_point)1178   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_zero_point) {
1179     TEST_REQUIRES_X86_SSE41;
1180     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1181       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1182         VMulCMicrokernelTester()
1183           .batch_size(batch_size)
1184           .a_zero_point(a_zero_point)
1185           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1186       }
1187     }
1188   }
1189 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_zero_point)1190   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_zero_point) {
1191     TEST_REQUIRES_X86_SSE41;
1192     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1193       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1194         VMulCMicrokernelTester()
1195           .batch_size(batch_size)
1196           .b_zero_point(b_zero_point)
1197           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1198       }
1199     }
1200   }
1201 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_zero_point)1202   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_zero_point) {
1203     TEST_REQUIRES_X86_SSE41;
1204     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1205       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1206         VMulCMicrokernelTester()
1207           .batch_size(batch_size)
1208           .y_zero_point(y_zero_point)
1209           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1210       }
1211     }
1212   }
1213 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_scale)1214   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_scale) {
1215     TEST_REQUIRES_X86_SSE41;
1216     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1217       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1218         VMulCMicrokernelTester()
1219           .batch_size(batch_size)
1220           .a_scale(a_scale)
1221           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1222       }
1223     }
1224   }
1225 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_scale)1226   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_scale) {
1227     TEST_REQUIRES_X86_SSE41;
1228     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1229       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1230         VMulCMicrokernelTester()
1231           .batch_size(batch_size)
1232           .b_scale(b_scale)
1233           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1234       }
1235     }
1236   }
1237 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_scale)1238   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_scale) {
1239     TEST_REQUIRES_X86_SSE41;
1240     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1241       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1242         VMulCMicrokernelTester()
1243           .batch_size(batch_size)
1244           .y_scale(y_scale)
1245           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1246       }
1247     }
1248   }
1249 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmin)1250   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmin) {
1251     TEST_REQUIRES_X86_SSE41;
1252     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1253       VMulCMicrokernelTester()
1254         .batch_size(batch_size)
1255         .qmin(128)
1256         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1257     }
1258   }
1259 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmax)1260   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmax) {
1261     TEST_REQUIRES_X86_SSE41;
1262     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1263       VMulCMicrokernelTester()
1264         .batch_size(batch_size)
1265         .qmax(128)
1266         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1267     }
1268   }
1269 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1270 
1271 
1272 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_eq_16)1273   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_eq_16) {
1274     TEST_REQUIRES_X86_SSE41;
1275     VMulCMicrokernelTester()
1276       .batch_size(16)
1277       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1278   }
1279 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_div_16)1280   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_div_16) {
1281     TEST_REQUIRES_X86_SSE41;
1282     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1283       VMulCMicrokernelTester()
1284         .batch_size(batch_size)
1285         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1286     }
1287   }
1288 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_lt_16)1289   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_lt_16) {
1290     TEST_REQUIRES_X86_SSE41;
1291     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1292       VMulCMicrokernelTester()
1293         .batch_size(batch_size)
1294         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1295     }
1296   }
1297 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_gt_16)1298   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_gt_16) {
1299     TEST_REQUIRES_X86_SSE41;
1300     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1301       VMulCMicrokernelTester()
1302         .batch_size(batch_size)
1303         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1304     }
1305   }
1306 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace)1307   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace) {
1308     TEST_REQUIRES_X86_SSE41;
1309     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1310       VMulCMicrokernelTester()
1311         .batch_size(batch_size)
1312         .inplace(true)
1313         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1314     }
1315   }
1316 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_zero_point)1317   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_zero_point) {
1318     TEST_REQUIRES_X86_SSE41;
1319     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1320       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1321         VMulCMicrokernelTester()
1322           .batch_size(batch_size)
1323           .a_zero_point(a_zero_point)
1324           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1325       }
1326     }
1327   }
1328 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_zero_point)1329   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_zero_point) {
1330     TEST_REQUIRES_X86_SSE41;
1331     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1332       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1333         VMulCMicrokernelTester()
1334           .batch_size(batch_size)
1335           .b_zero_point(b_zero_point)
1336           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1337       }
1338     }
1339   }
1340 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_zero_point)1341   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_zero_point) {
1342     TEST_REQUIRES_X86_SSE41;
1343     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1344       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1345         VMulCMicrokernelTester()
1346           .batch_size(batch_size)
1347           .y_zero_point(y_zero_point)
1348           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1349       }
1350     }
1351   }
1352 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_scale)1353   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_scale) {
1354     TEST_REQUIRES_X86_SSE41;
1355     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1356       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1357         VMulCMicrokernelTester()
1358           .batch_size(batch_size)
1359           .a_scale(a_scale)
1360           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1361       }
1362     }
1363   }
1364 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_scale)1365   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_scale) {
1366     TEST_REQUIRES_X86_SSE41;
1367     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1368       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1369         VMulCMicrokernelTester()
1370           .batch_size(batch_size)
1371           .b_scale(b_scale)
1372           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1373       }
1374     }
1375   }
1376 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_scale)1377   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_scale) {
1378     TEST_REQUIRES_X86_SSE41;
1379     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1380       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1381         VMulCMicrokernelTester()
1382           .batch_size(batch_size)
1383           .y_scale(y_scale)
1384           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1385       }
1386     }
1387   }
1388 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmin)1389   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmin) {
1390     TEST_REQUIRES_X86_SSE41;
1391     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1392       VMulCMicrokernelTester()
1393         .batch_size(batch_size)
1394         .qmin(128)
1395         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1396     }
1397   }
1398 
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmax)1399   TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmax) {
1400     TEST_REQUIRES_X86_SSE41;
1401     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1402       VMulCMicrokernelTester()
1403         .batch_size(batch_size)
1404         .qmax(128)
1405         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1406     }
1407   }
1408 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1409 
1410 
1411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_eq_8)1412   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_eq_8) {
1413     TEST_REQUIRES_X86_AVX;
1414     VMulCMicrokernelTester()
1415       .batch_size(8)
1416       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1417   }
1418 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_div_8)1419   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_div_8) {
1420     TEST_REQUIRES_X86_AVX;
1421     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1422       VMulCMicrokernelTester()
1423         .batch_size(batch_size)
1424         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1425     }
1426   }
1427 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_lt_8)1428   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_lt_8) {
1429     TEST_REQUIRES_X86_AVX;
1430     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1431       VMulCMicrokernelTester()
1432         .batch_size(batch_size)
1433         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1434     }
1435   }
1436 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_gt_8)1437   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_gt_8) {
1438     TEST_REQUIRES_X86_AVX;
1439     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1440       VMulCMicrokernelTester()
1441         .batch_size(batch_size)
1442         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1443     }
1444   }
1445 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace)1446   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace) {
1447     TEST_REQUIRES_X86_AVX;
1448     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1449       VMulCMicrokernelTester()
1450         .batch_size(batch_size)
1451         .inplace(true)
1452         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1453     }
1454   }
1455 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,a_zero_point)1456   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, a_zero_point) {
1457     TEST_REQUIRES_X86_AVX;
1458     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1459       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1460         VMulCMicrokernelTester()
1461           .batch_size(batch_size)
1462           .a_zero_point(a_zero_point)
1463           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1464       }
1465     }
1466   }
1467 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,b_zero_point)1468   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, b_zero_point) {
1469     TEST_REQUIRES_X86_AVX;
1470     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1471       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1472         VMulCMicrokernelTester()
1473           .batch_size(batch_size)
1474           .b_zero_point(b_zero_point)
1475           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1476       }
1477     }
1478   }
1479 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,y_zero_point)1480   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, y_zero_point) {
1481     TEST_REQUIRES_X86_AVX;
1482     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1483       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1484         VMulCMicrokernelTester()
1485           .batch_size(batch_size)
1486           .y_zero_point(y_zero_point)
1487           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1488       }
1489     }
1490   }
1491 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,a_scale)1492   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, a_scale) {
1493     TEST_REQUIRES_X86_AVX;
1494     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1495       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1496         VMulCMicrokernelTester()
1497           .batch_size(batch_size)
1498           .a_scale(a_scale)
1499           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1500       }
1501     }
1502   }
1503 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,b_scale)1504   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, b_scale) {
1505     TEST_REQUIRES_X86_AVX;
1506     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1507       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1508         VMulCMicrokernelTester()
1509           .batch_size(batch_size)
1510           .b_scale(b_scale)
1511           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1512       }
1513     }
1514   }
1515 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,y_scale)1516   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, y_scale) {
1517     TEST_REQUIRES_X86_AVX;
1518     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1519       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1520         VMulCMicrokernelTester()
1521           .batch_size(batch_size)
1522           .y_scale(y_scale)
1523           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1524       }
1525     }
1526   }
1527 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,qmin)1528   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, qmin) {
1529     TEST_REQUIRES_X86_AVX;
1530     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1531       VMulCMicrokernelTester()
1532         .batch_size(batch_size)
1533         .qmin(128)
1534         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1535     }
1536   }
1537 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,qmax)1538   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, qmax) {
1539     TEST_REQUIRES_X86_AVX;
1540     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1541       VMulCMicrokernelTester()
1542         .batch_size(batch_size)
1543         .qmax(128)
1544         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1545     }
1546   }
1547 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1548 
1549 
1550 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_eq_16)1551   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_eq_16) {
1552     TEST_REQUIRES_X86_AVX;
1553     VMulCMicrokernelTester()
1554       .batch_size(16)
1555       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1556   }
1557 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_div_16)1558   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_div_16) {
1559     TEST_REQUIRES_X86_AVX;
1560     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1561       VMulCMicrokernelTester()
1562         .batch_size(batch_size)
1563         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1564     }
1565   }
1566 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_lt_16)1567   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_lt_16) {
1568     TEST_REQUIRES_X86_AVX;
1569     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1570       VMulCMicrokernelTester()
1571         .batch_size(batch_size)
1572         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1573     }
1574   }
1575 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_gt_16)1576   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_gt_16) {
1577     TEST_REQUIRES_X86_AVX;
1578     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1579       VMulCMicrokernelTester()
1580         .batch_size(batch_size)
1581         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1582     }
1583   }
1584 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace)1585   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace) {
1586     TEST_REQUIRES_X86_AVX;
1587     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1588       VMulCMicrokernelTester()
1589         .batch_size(batch_size)
1590         .inplace(true)
1591         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1592     }
1593   }
1594 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,a_zero_point)1595   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, a_zero_point) {
1596     TEST_REQUIRES_X86_AVX;
1597     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1598       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1599         VMulCMicrokernelTester()
1600           .batch_size(batch_size)
1601           .a_zero_point(a_zero_point)
1602           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1603       }
1604     }
1605   }
1606 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,b_zero_point)1607   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, b_zero_point) {
1608     TEST_REQUIRES_X86_AVX;
1609     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1610       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1611         VMulCMicrokernelTester()
1612           .batch_size(batch_size)
1613           .b_zero_point(b_zero_point)
1614           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1615       }
1616     }
1617   }
1618 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,y_zero_point)1619   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, y_zero_point) {
1620     TEST_REQUIRES_X86_AVX;
1621     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1622       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1623         VMulCMicrokernelTester()
1624           .batch_size(batch_size)
1625           .y_zero_point(y_zero_point)
1626           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1627       }
1628     }
1629   }
1630 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,a_scale)1631   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, a_scale) {
1632     TEST_REQUIRES_X86_AVX;
1633     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1634       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1635         VMulCMicrokernelTester()
1636           .batch_size(batch_size)
1637           .a_scale(a_scale)
1638           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1639       }
1640     }
1641   }
1642 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,b_scale)1643   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, b_scale) {
1644     TEST_REQUIRES_X86_AVX;
1645     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1646       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1647         VMulCMicrokernelTester()
1648           .batch_size(batch_size)
1649           .b_scale(b_scale)
1650           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1651       }
1652     }
1653   }
1654 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,y_scale)1655   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, y_scale) {
1656     TEST_REQUIRES_X86_AVX;
1657     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1658       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1659         VMulCMicrokernelTester()
1660           .batch_size(batch_size)
1661           .y_scale(y_scale)
1662           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1663       }
1664     }
1665   }
1666 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,qmin)1667   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, qmin) {
1668     TEST_REQUIRES_X86_AVX;
1669     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1670       VMulCMicrokernelTester()
1671         .batch_size(batch_size)
1672         .qmin(128)
1673         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1674     }
1675   }
1676 
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,qmax)1677   TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, qmax) {
1678     TEST_REQUIRES_X86_AVX;
1679     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1680       VMulCMicrokernelTester()
1681         .batch_size(batch_size)
1682         .qmax(128)
1683         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1684     }
1685   }
1686 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1687 
1688 
1689 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_eq_8)1690   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_eq_8) {
1691     VMulCMicrokernelTester()
1692       .batch_size(8)
1693       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1694   }
1695 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_div_8)1696   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_div_8) {
1697     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1698       VMulCMicrokernelTester()
1699         .batch_size(batch_size)
1700         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1701     }
1702   }
1703 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_lt_8)1704   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_lt_8) {
1705     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1706       VMulCMicrokernelTester()
1707         .batch_size(batch_size)
1708         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1709     }
1710   }
1711 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_gt_8)1712   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_gt_8) {
1713     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1714       VMulCMicrokernelTester()
1715         .batch_size(batch_size)
1716         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1717     }
1718   }
1719 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace)1720   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace) {
1721     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1722       VMulCMicrokernelTester()
1723         .batch_size(batch_size)
1724         .inplace(true)
1725         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1726     }
1727   }
1728 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_zero_point)1729   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_zero_point) {
1730     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1731       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1732         VMulCMicrokernelTester()
1733           .batch_size(batch_size)
1734           .a_zero_point(a_zero_point)
1735           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1736       }
1737     }
1738   }
1739 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_zero_point)1740   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_zero_point) {
1741     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1742       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1743         VMulCMicrokernelTester()
1744           .batch_size(batch_size)
1745           .b_zero_point(b_zero_point)
1746           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1747       }
1748     }
1749   }
1750 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_zero_point)1751   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_zero_point) {
1752     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1753       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1754         VMulCMicrokernelTester()
1755           .batch_size(batch_size)
1756           .y_zero_point(y_zero_point)
1757           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1758       }
1759     }
1760   }
1761 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_scale)1762   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_scale) {
1763     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1764       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1765         VMulCMicrokernelTester()
1766           .batch_size(batch_size)
1767           .a_scale(a_scale)
1768           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1769       }
1770     }
1771   }
1772 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_scale)1773   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_scale) {
1774     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1775       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1776         VMulCMicrokernelTester()
1777           .batch_size(batch_size)
1778           .b_scale(b_scale)
1779           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1780       }
1781     }
1782   }
1783 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_scale)1784   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_scale) {
1785     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1786       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1787         VMulCMicrokernelTester()
1788           .batch_size(batch_size)
1789           .y_scale(y_scale)
1790           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1791       }
1792     }
1793   }
1794 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmin)1795   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmin) {
1796     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1797       VMulCMicrokernelTester()
1798         .batch_size(batch_size)
1799         .qmin(128)
1800         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1801     }
1802   }
1803 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmax)1804   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmax) {
1805     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1806       VMulCMicrokernelTester()
1807         .batch_size(batch_size)
1808         .qmax(128)
1809         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1810     }
1811   }
1812 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1813 
1814 
1815 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_eq_16)1816   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_eq_16) {
1817     VMulCMicrokernelTester()
1818       .batch_size(16)
1819       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1820   }
1821 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_div_16)1822   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_div_16) {
1823     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1824       VMulCMicrokernelTester()
1825         .batch_size(batch_size)
1826         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1827     }
1828   }
1829 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_lt_16)1830   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_lt_16) {
1831     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1832       VMulCMicrokernelTester()
1833         .batch_size(batch_size)
1834         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1835     }
1836   }
1837 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_gt_16)1838   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_gt_16) {
1839     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1840       VMulCMicrokernelTester()
1841         .batch_size(batch_size)
1842         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1843     }
1844   }
1845 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace)1846   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace) {
1847     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1848       VMulCMicrokernelTester()
1849         .batch_size(batch_size)
1850         .inplace(true)
1851         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1852     }
1853   }
1854 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_zero_point)1855   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_zero_point) {
1856     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1857       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1858         VMulCMicrokernelTester()
1859           .batch_size(batch_size)
1860           .a_zero_point(a_zero_point)
1861           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1862       }
1863     }
1864   }
1865 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_zero_point)1866   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_zero_point) {
1867     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1868       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1869         VMulCMicrokernelTester()
1870           .batch_size(batch_size)
1871           .b_zero_point(b_zero_point)
1872           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1873       }
1874     }
1875   }
1876 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_zero_point)1877   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_zero_point) {
1878     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1879       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1880         VMulCMicrokernelTester()
1881           .batch_size(batch_size)
1882           .y_zero_point(y_zero_point)
1883           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1884       }
1885     }
1886   }
1887 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_scale)1888   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_scale) {
1889     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1890       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1891         VMulCMicrokernelTester()
1892           .batch_size(batch_size)
1893           .a_scale(a_scale)
1894           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1895       }
1896     }
1897   }
1898 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_scale)1899   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_scale) {
1900     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1901       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1902         VMulCMicrokernelTester()
1903           .batch_size(batch_size)
1904           .b_scale(b_scale)
1905           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1906       }
1907     }
1908   }
1909 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_scale)1910   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_scale) {
1911     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1912       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1913         VMulCMicrokernelTester()
1914           .batch_size(batch_size)
1915           .y_scale(y_scale)
1916           .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1917       }
1918     }
1919   }
1920 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmin)1921   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmin) {
1922     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1923       VMulCMicrokernelTester()
1924         .batch_size(batch_size)
1925         .qmin(128)
1926         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1927     }
1928   }
1929 
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmax)1930   TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmax) {
1931     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932       VMulCMicrokernelTester()
1933         .batch_size(batch_size)
1934         .qmax(128)
1935         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1936     }
1937   }
1938 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1939 
1940 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,batch_eq_1)1941 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, batch_eq_1) {
1942   VMulCMicrokernelTester()
1943     .batch_size(1)
1944     .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1945 }
1946 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,batch_gt_1)1947 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, batch_gt_1) {
1948   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
1949     VMulCMicrokernelTester()
1950       .batch_size(batch_size)
1951       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1952   }
1953 }
1954 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,inplace)1955 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, inplace) {
1956   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1957     VMulCMicrokernelTester()
1958       .batch_size(batch_size)
1959       .inplace(true)
1960       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1961   }
1962 }
1963 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,a_zero_point)1964 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, a_zero_point) {
1965   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1966     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1967       VMulCMicrokernelTester()
1968         .batch_size(batch_size)
1969         .a_zero_point(a_zero_point)
1970         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1971     }
1972   }
1973 }
1974 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,b_zero_point)1975 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, b_zero_point) {
1976   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1977     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1978       VMulCMicrokernelTester()
1979         .batch_size(batch_size)
1980         .b_zero_point(b_zero_point)
1981         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1982     }
1983   }
1984 }
1985 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,y_zero_point)1986 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, y_zero_point) {
1987   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1988     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1989       VMulCMicrokernelTester()
1990         .batch_size(batch_size)
1991         .y_zero_point(y_zero_point)
1992         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1993     }
1994   }
1995 }
1996 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,a_scale)1997 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, a_scale) {
1998   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1999     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2000       VMulCMicrokernelTester()
2001         .batch_size(batch_size)
2002         .a_scale(a_scale)
2003         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2004     }
2005   }
2006 }
2007 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,b_scale)2008 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, b_scale) {
2009   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2010     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2011       VMulCMicrokernelTester()
2012         .batch_size(batch_size)
2013         .b_scale(b_scale)
2014         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2015     }
2016   }
2017 }
2018 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,y_scale)2019 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, y_scale) {
2020   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2021     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2022       VMulCMicrokernelTester()
2023         .batch_size(batch_size)
2024         .y_scale(y_scale)
2025         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2026     }
2027   }
2028 }
2029 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,qmin)2030 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, qmin) {
2031   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2032     VMulCMicrokernelTester()
2033       .batch_size(batch_size)
2034       .qmin(128)
2035       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2036   }
2037 }
2038 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,qmax)2039 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, qmax) {
2040   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2041     VMulCMicrokernelTester()
2042       .batch_size(batch_size)
2043       .qmax(128)
2044       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2045   }
2046 }
2047 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_eq_2)2048 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_eq_2) {
2049   VMulCMicrokernelTester()
2050     .batch_size(2)
2051     .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2052 }
2053 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_div_2)2054 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_div_2) {
2055   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2056     VMulCMicrokernelTester()
2057       .batch_size(batch_size)
2058       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2059   }
2060 }
2061 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_lt_2)2062 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_lt_2) {
2063   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2064     VMulCMicrokernelTester()
2065       .batch_size(batch_size)
2066       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2067   }
2068 }
2069 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_gt_2)2070 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_gt_2) {
2071   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2072     VMulCMicrokernelTester()
2073       .batch_size(batch_size)
2074       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2075   }
2076 }
2077 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,inplace)2078 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, inplace) {
2079   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2080     VMulCMicrokernelTester()
2081       .batch_size(batch_size)
2082       .inplace(true)
2083       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2084   }
2085 }
2086 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,a_zero_point)2087 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, a_zero_point) {
2088   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2089     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2090       VMulCMicrokernelTester()
2091         .batch_size(batch_size)
2092         .a_zero_point(a_zero_point)
2093         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2094     }
2095   }
2096 }
2097 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,b_zero_point)2098 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, b_zero_point) {
2099   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2100     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2101       VMulCMicrokernelTester()
2102         .batch_size(batch_size)
2103         .b_zero_point(b_zero_point)
2104         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2105     }
2106   }
2107 }
2108 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,y_zero_point)2109 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, y_zero_point) {
2110   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2111     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2112       VMulCMicrokernelTester()
2113         .batch_size(batch_size)
2114         .y_zero_point(y_zero_point)
2115         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2116     }
2117   }
2118 }
2119 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,a_scale)2120 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, a_scale) {
2121   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2122     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2123       VMulCMicrokernelTester()
2124         .batch_size(batch_size)
2125         .a_scale(a_scale)
2126         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2127     }
2128   }
2129 }
2130 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,b_scale)2131 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, b_scale) {
2132   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2133     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2134       VMulCMicrokernelTester()
2135         .batch_size(batch_size)
2136         .b_scale(b_scale)
2137         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2138     }
2139   }
2140 }
2141 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,y_scale)2142 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, y_scale) {
2143   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2144     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2145       VMulCMicrokernelTester()
2146         .batch_size(batch_size)
2147         .y_scale(y_scale)
2148         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2149     }
2150   }
2151 }
2152 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,qmin)2153 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, qmin) {
2154   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2155     VMulCMicrokernelTester()
2156       .batch_size(batch_size)
2157       .qmin(128)
2158       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2159   }
2160 }
2161 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,qmax)2162 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, qmax) {
2163   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2164     VMulCMicrokernelTester()
2165       .batch_size(batch_size)
2166       .qmax(128)
2167       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2168   }
2169 }
2170 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_eq_4)2171 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_eq_4) {
2172   VMulCMicrokernelTester()
2173     .batch_size(4)
2174     .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2175 }
2176 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_div_4)2177 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_div_4) {
2178   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2179     VMulCMicrokernelTester()
2180       .batch_size(batch_size)
2181       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2182   }
2183 }
2184 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_lt_4)2185 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_lt_4) {
2186   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2187     VMulCMicrokernelTester()
2188       .batch_size(batch_size)
2189       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2190   }
2191 }
2192 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_gt_4)2193 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_gt_4) {
2194   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2195     VMulCMicrokernelTester()
2196       .batch_size(batch_size)
2197       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2198   }
2199 }
2200 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,inplace)2201 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, inplace) {
2202   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2203     VMulCMicrokernelTester()
2204       .batch_size(batch_size)
2205       .inplace(true)
2206       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2207   }
2208 }
2209 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,a_zero_point)2210 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, a_zero_point) {
2211   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2212     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2213       VMulCMicrokernelTester()
2214         .batch_size(batch_size)
2215         .a_zero_point(a_zero_point)
2216         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2217     }
2218   }
2219 }
2220 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,b_zero_point)2221 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, b_zero_point) {
2222   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2223     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2224       VMulCMicrokernelTester()
2225         .batch_size(batch_size)
2226         .b_zero_point(b_zero_point)
2227         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2228     }
2229   }
2230 }
2231 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,y_zero_point)2232 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, y_zero_point) {
2233   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2234     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2235       VMulCMicrokernelTester()
2236         .batch_size(batch_size)
2237         .y_zero_point(y_zero_point)
2238         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2239     }
2240   }
2241 }
2242 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,a_scale)2243 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, a_scale) {
2244   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2245     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2246       VMulCMicrokernelTester()
2247         .batch_size(batch_size)
2248         .a_scale(a_scale)
2249         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2250     }
2251   }
2252 }
2253 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,b_scale)2254 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, b_scale) {
2255   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2256     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2257       VMulCMicrokernelTester()
2258         .batch_size(batch_size)
2259         .b_scale(b_scale)
2260         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2261     }
2262   }
2263 }
2264 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,y_scale)2265 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, y_scale) {
2266   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2267     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2268       VMulCMicrokernelTester()
2269         .batch_size(batch_size)
2270         .y_scale(y_scale)
2271         .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2272     }
2273   }
2274 }
2275 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,qmin)2276 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, qmin) {
2277   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2278     VMulCMicrokernelTester()
2279       .batch_size(batch_size)
2280       .qmin(128)
2281       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2282   }
2283 }
2284 
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,qmax)2285 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, qmax) {
2286   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2287     VMulCMicrokernelTester()
2288       .batch_size(batch_size)
2289       .qmax(128)
2290       .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2291   }
2292 }