1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/qs8-vmulc-minmax-fp32.yaml
8 // Generator: tools/generate-vbinary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vmul.h>
18 #include "vmulc-microkernel-tester.h"
19
20
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_eq_8)22 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VMulCMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
27 }
28
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_div_8)29 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VMulCMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
35 }
36 }
37
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_lt_8)38 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VMulCMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
44 }
45 }
46
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,batch_gt_8)47 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VMulCMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53 }
54 }
55
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,inplace)56 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, inplace) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VMulCMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace(true)
62 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
63 }
64 }
65
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,a_zero_point)66 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, a_zero_point) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
70 VMulCMicrokernelTester()
71 .batch_size(batch_size)
72 .a_zero_point(a_zero_point)
73 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
74 }
75 }
76 }
77
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,b_zero_point)78 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, b_zero_point) {
79 TEST_REQUIRES_ARM_NEON;
80 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
81 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
82 VMulCMicrokernelTester()
83 .batch_size(batch_size)
84 .b_zero_point(b_zero_point)
85 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
86 }
87 }
88 }
89
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,y_zero_point)90 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, y_zero_point) {
91 TEST_REQUIRES_ARM_NEON;
92 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
93 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
94 VMulCMicrokernelTester()
95 .batch_size(batch_size)
96 .y_zero_point(y_zero_point)
97 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
98 }
99 }
100 }
101
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,a_scale)102 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, a_scale) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
106 VMulCMicrokernelTester()
107 .batch_size(batch_size)
108 .a_scale(a_scale)
109 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
110 }
111 }
112 }
113
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,b_scale)114 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, b_scale) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
117 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
118 VMulCMicrokernelTester()
119 .batch_size(batch_size)
120 .b_scale(b_scale)
121 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
122 }
123 }
124 }
125
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,y_scale)126 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, y_scale) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
129 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
130 VMulCMicrokernelTester()
131 .batch_size(batch_size)
132 .y_scale(y_scale)
133 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
134 }
135 }
136 }
137
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,qmin)138 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, qmin) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141 VMulCMicrokernelTester()
142 .batch_size(batch_size)
143 .qmin(128)
144 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
145 }
146 }
147
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8,qmax)148 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X8, qmax) {
149 TEST_REQUIRES_ARM_NEON;
150 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
151 VMulCMicrokernelTester()
152 .batch_size(batch_size)
153 .qmax(128)
154 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
155 }
156 }
157 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
158
159
160 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_eq_16)161 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_eq_16) {
162 TEST_REQUIRES_ARM_NEON;
163 VMulCMicrokernelTester()
164 .batch_size(16)
165 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
166 }
167
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_div_16)168 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_div_16) {
169 TEST_REQUIRES_ARM_NEON;
170 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
171 VMulCMicrokernelTester()
172 .batch_size(batch_size)
173 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
174 }
175 }
176
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_lt_16)177 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_lt_16) {
178 TEST_REQUIRES_ARM_NEON;
179 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
180 VMulCMicrokernelTester()
181 .batch_size(batch_size)
182 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
183 }
184 }
185
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,batch_gt_16)186 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, batch_gt_16) {
187 TEST_REQUIRES_ARM_NEON;
188 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
189 VMulCMicrokernelTester()
190 .batch_size(batch_size)
191 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
192 }
193 }
194
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,inplace)195 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, inplace) {
196 TEST_REQUIRES_ARM_NEON;
197 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
198 VMulCMicrokernelTester()
199 .batch_size(batch_size)
200 .inplace(true)
201 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
202 }
203 }
204
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,a_zero_point)205 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, a_zero_point) {
206 TEST_REQUIRES_ARM_NEON;
207 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
208 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
209 VMulCMicrokernelTester()
210 .batch_size(batch_size)
211 .a_zero_point(a_zero_point)
212 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
213 }
214 }
215 }
216
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,b_zero_point)217 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, b_zero_point) {
218 TEST_REQUIRES_ARM_NEON;
219 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
220 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
221 VMulCMicrokernelTester()
222 .batch_size(batch_size)
223 .b_zero_point(b_zero_point)
224 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
225 }
226 }
227 }
228
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,y_zero_point)229 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, y_zero_point) {
230 TEST_REQUIRES_ARM_NEON;
231 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
232 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
233 VMulCMicrokernelTester()
234 .batch_size(batch_size)
235 .y_zero_point(y_zero_point)
236 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
237 }
238 }
239 }
240
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,a_scale)241 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, a_scale) {
242 TEST_REQUIRES_ARM_NEON;
243 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
244 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
245 VMulCMicrokernelTester()
246 .batch_size(batch_size)
247 .a_scale(a_scale)
248 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
249 }
250 }
251 }
252
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,b_scale)253 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, b_scale) {
254 TEST_REQUIRES_ARM_NEON;
255 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
256 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
257 VMulCMicrokernelTester()
258 .batch_size(batch_size)
259 .b_scale(b_scale)
260 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
261 }
262 }
263 }
264
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,y_scale)265 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, y_scale) {
266 TEST_REQUIRES_ARM_NEON;
267 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
268 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
269 VMulCMicrokernelTester()
270 .batch_size(batch_size)
271 .y_scale(y_scale)
272 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
273 }
274 }
275 }
276
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,qmin)277 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, qmin) {
278 TEST_REQUIRES_ARM_NEON;
279 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
280 VMulCMicrokernelTester()
281 .batch_size(batch_size)
282 .qmin(128)
283 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
284 }
285 }
286
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16,qmax)287 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD64_X16, qmax) {
288 TEST_REQUIRES_ARM_NEON;
289 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
290 VMulCMicrokernelTester()
291 .batch_size(batch_size)
292 .qmax(128)
293 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
294 }
295 }
296 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
297
298
299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_eq_16)300 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_eq_16) {
301 TEST_REQUIRES_ARM_NEON;
302 VMulCMicrokernelTester()
303 .batch_size(16)
304 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
305 }
306
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_div_16)307 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_div_16) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
310 VMulCMicrokernelTester()
311 .batch_size(batch_size)
312 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
313 }
314 }
315
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_lt_16)316 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_lt_16) {
317 TEST_REQUIRES_ARM_NEON;
318 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
319 VMulCMicrokernelTester()
320 .batch_size(batch_size)
321 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
322 }
323 }
324
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,batch_gt_16)325 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, batch_gt_16) {
326 TEST_REQUIRES_ARM_NEON;
327 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
328 VMulCMicrokernelTester()
329 .batch_size(batch_size)
330 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
331 }
332 }
333
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,inplace)334 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, inplace) {
335 TEST_REQUIRES_ARM_NEON;
336 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
337 VMulCMicrokernelTester()
338 .batch_size(batch_size)
339 .inplace(true)
340 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
341 }
342 }
343
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,a_zero_point)344 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, a_zero_point) {
345 TEST_REQUIRES_ARM_NEON;
346 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
347 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
348 VMulCMicrokernelTester()
349 .batch_size(batch_size)
350 .a_zero_point(a_zero_point)
351 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
352 }
353 }
354 }
355
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,b_zero_point)356 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, b_zero_point) {
357 TEST_REQUIRES_ARM_NEON;
358 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
359 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
360 VMulCMicrokernelTester()
361 .batch_size(batch_size)
362 .b_zero_point(b_zero_point)
363 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
364 }
365 }
366 }
367
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,y_zero_point)368 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, y_zero_point) {
369 TEST_REQUIRES_ARM_NEON;
370 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
371 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
372 VMulCMicrokernelTester()
373 .batch_size(batch_size)
374 .y_zero_point(y_zero_point)
375 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
376 }
377 }
378 }
379
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,a_scale)380 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, a_scale) {
381 TEST_REQUIRES_ARM_NEON;
382 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
383 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
384 VMulCMicrokernelTester()
385 .batch_size(batch_size)
386 .a_scale(a_scale)
387 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
388 }
389 }
390 }
391
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,b_scale)392 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, b_scale) {
393 TEST_REQUIRES_ARM_NEON;
394 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
395 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
396 VMulCMicrokernelTester()
397 .batch_size(batch_size)
398 .b_scale(b_scale)
399 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
400 }
401 }
402 }
403
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,y_scale)404 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, y_scale) {
405 TEST_REQUIRES_ARM_NEON;
406 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
407 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
408 VMulCMicrokernelTester()
409 .batch_size(batch_size)
410 .y_scale(y_scale)
411 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
412 }
413 }
414 }
415
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,qmin)416 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
419 VMulCMicrokernelTester()
420 .batch_size(batch_size)
421 .qmin(128)
422 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
423 }
424 }
425
TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16,qmax)426 TEST(QS8_VMULC_MINMAX_FP32__NEON_LD128_X16, qmax) {
427 TEST_REQUIRES_ARM_NEON;
428 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
429 VMulCMicrokernelTester()
430 .batch_size(batch_size)
431 .qmax(128)
432 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
433 }
434 }
435 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
436
437
438 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_eq_8)439 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_eq_8) {
440 TEST_REQUIRES_ARM_NEON_V8;
441 VMulCMicrokernelTester()
442 .batch_size(8)
443 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
444 }
445
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_div_8)446 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_div_8) {
447 TEST_REQUIRES_ARM_NEON_V8;
448 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
449 VMulCMicrokernelTester()
450 .batch_size(batch_size)
451 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
452 }
453 }
454
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_lt_8)455 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_lt_8) {
456 TEST_REQUIRES_ARM_NEON_V8;
457 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
458 VMulCMicrokernelTester()
459 .batch_size(batch_size)
460 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
461 }
462 }
463
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,batch_gt_8)464 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, batch_gt_8) {
465 TEST_REQUIRES_ARM_NEON_V8;
466 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
467 VMulCMicrokernelTester()
468 .batch_size(batch_size)
469 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
470 }
471 }
472
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,inplace)473 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, inplace) {
474 TEST_REQUIRES_ARM_NEON_V8;
475 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
476 VMulCMicrokernelTester()
477 .batch_size(batch_size)
478 .inplace(true)
479 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
480 }
481 }
482
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,a_zero_point)483 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, a_zero_point) {
484 TEST_REQUIRES_ARM_NEON_V8;
485 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
486 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
487 VMulCMicrokernelTester()
488 .batch_size(batch_size)
489 .a_zero_point(a_zero_point)
490 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
491 }
492 }
493 }
494
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,b_zero_point)495 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, b_zero_point) {
496 TEST_REQUIRES_ARM_NEON_V8;
497 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
498 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
499 VMulCMicrokernelTester()
500 .batch_size(batch_size)
501 .b_zero_point(b_zero_point)
502 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
503 }
504 }
505 }
506
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,y_zero_point)507 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, y_zero_point) {
508 TEST_REQUIRES_ARM_NEON_V8;
509 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
510 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
511 VMulCMicrokernelTester()
512 .batch_size(batch_size)
513 .y_zero_point(y_zero_point)
514 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
515 }
516 }
517 }
518
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,a_scale)519 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, a_scale) {
520 TEST_REQUIRES_ARM_NEON_V8;
521 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
522 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
523 VMulCMicrokernelTester()
524 .batch_size(batch_size)
525 .a_scale(a_scale)
526 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
527 }
528 }
529 }
530
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,b_scale)531 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, b_scale) {
532 TEST_REQUIRES_ARM_NEON_V8;
533 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
534 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
535 VMulCMicrokernelTester()
536 .batch_size(batch_size)
537 .b_scale(b_scale)
538 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
539 }
540 }
541 }
542
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,y_scale)543 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, y_scale) {
544 TEST_REQUIRES_ARM_NEON_V8;
545 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
546 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
547 VMulCMicrokernelTester()
548 .batch_size(batch_size)
549 .y_scale(y_scale)
550 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
551 }
552 }
553 }
554
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,qmin)555 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, qmin) {
556 TEST_REQUIRES_ARM_NEON_V8;
557 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
558 VMulCMicrokernelTester()
559 .batch_size(batch_size)
560 .qmin(128)
561 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
562 }
563 }
564
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8,qmax)565 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X8, qmax) {
566 TEST_REQUIRES_ARM_NEON_V8;
567 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
568 VMulCMicrokernelTester()
569 .batch_size(batch_size)
570 .qmax(128)
571 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
572 }
573 }
574 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
575
576
577 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_eq_16)578 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_eq_16) {
579 TEST_REQUIRES_ARM_NEON_V8;
580 VMulCMicrokernelTester()
581 .batch_size(16)
582 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
583 }
584
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_div_16)585 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_div_16) {
586 TEST_REQUIRES_ARM_NEON_V8;
587 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
588 VMulCMicrokernelTester()
589 .batch_size(batch_size)
590 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
591 }
592 }
593
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_lt_16)594 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_lt_16) {
595 TEST_REQUIRES_ARM_NEON_V8;
596 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
597 VMulCMicrokernelTester()
598 .batch_size(batch_size)
599 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
600 }
601 }
602
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,batch_gt_16)603 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, batch_gt_16) {
604 TEST_REQUIRES_ARM_NEON_V8;
605 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
606 VMulCMicrokernelTester()
607 .batch_size(batch_size)
608 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
609 }
610 }
611
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,inplace)612 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, inplace) {
613 TEST_REQUIRES_ARM_NEON_V8;
614 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
615 VMulCMicrokernelTester()
616 .batch_size(batch_size)
617 .inplace(true)
618 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
619 }
620 }
621
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,a_zero_point)622 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, a_zero_point) {
623 TEST_REQUIRES_ARM_NEON_V8;
624 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
625 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
626 VMulCMicrokernelTester()
627 .batch_size(batch_size)
628 .a_zero_point(a_zero_point)
629 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
630 }
631 }
632 }
633
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,b_zero_point)634 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, b_zero_point) {
635 TEST_REQUIRES_ARM_NEON_V8;
636 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
637 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
638 VMulCMicrokernelTester()
639 .batch_size(batch_size)
640 .b_zero_point(b_zero_point)
641 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
642 }
643 }
644 }
645
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,y_zero_point)646 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, y_zero_point) {
647 TEST_REQUIRES_ARM_NEON_V8;
648 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
649 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
650 VMulCMicrokernelTester()
651 .batch_size(batch_size)
652 .y_zero_point(y_zero_point)
653 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
654 }
655 }
656 }
657
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,a_scale)658 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, a_scale) {
659 TEST_REQUIRES_ARM_NEON_V8;
660 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
661 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
662 VMulCMicrokernelTester()
663 .batch_size(batch_size)
664 .a_scale(a_scale)
665 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
666 }
667 }
668 }
669
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,b_scale)670 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, b_scale) {
671 TEST_REQUIRES_ARM_NEON_V8;
672 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
673 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
674 VMulCMicrokernelTester()
675 .batch_size(batch_size)
676 .b_scale(b_scale)
677 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
678 }
679 }
680 }
681
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,y_scale)682 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, y_scale) {
683 TEST_REQUIRES_ARM_NEON_V8;
684 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
685 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
686 VMulCMicrokernelTester()
687 .batch_size(batch_size)
688 .y_scale(y_scale)
689 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
690 }
691 }
692 }
693
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,qmin)694 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, qmin) {
695 TEST_REQUIRES_ARM_NEON_V8;
696 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
697 VMulCMicrokernelTester()
698 .batch_size(batch_size)
699 .qmin(128)
700 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
701 }
702 }
703
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16,qmax)704 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD64_X16, qmax) {
705 TEST_REQUIRES_ARM_NEON_V8;
706 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
707 VMulCMicrokernelTester()
708 .batch_size(batch_size)
709 .qmax(128)
710 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
711 }
712 }
713 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
714
715
716 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_eq_16)717 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_eq_16) {
718 TEST_REQUIRES_ARM_NEON_V8;
719 VMulCMicrokernelTester()
720 .batch_size(16)
721 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
722 }
723
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_div_16)724 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_div_16) {
725 TEST_REQUIRES_ARM_NEON_V8;
726 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
727 VMulCMicrokernelTester()
728 .batch_size(batch_size)
729 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
730 }
731 }
732
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_lt_16)733 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_lt_16) {
734 TEST_REQUIRES_ARM_NEON_V8;
735 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
736 VMulCMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
739 }
740 }
741
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,batch_gt_16)742 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, batch_gt_16) {
743 TEST_REQUIRES_ARM_NEON_V8;
744 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
745 VMulCMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
748 }
749 }
750
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,inplace)751 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, inplace) {
752 TEST_REQUIRES_ARM_NEON_V8;
753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754 VMulCMicrokernelTester()
755 .batch_size(batch_size)
756 .inplace(true)
757 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
758 }
759 }
760
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,a_zero_point)761 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, a_zero_point) {
762 TEST_REQUIRES_ARM_NEON_V8;
763 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
764 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
765 VMulCMicrokernelTester()
766 .batch_size(batch_size)
767 .a_zero_point(a_zero_point)
768 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
769 }
770 }
771 }
772
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,b_zero_point)773 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, b_zero_point) {
774 TEST_REQUIRES_ARM_NEON_V8;
775 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
776 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
777 VMulCMicrokernelTester()
778 .batch_size(batch_size)
779 .b_zero_point(b_zero_point)
780 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
781 }
782 }
783 }
784
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,y_zero_point)785 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, y_zero_point) {
786 TEST_REQUIRES_ARM_NEON_V8;
787 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
788 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
789 VMulCMicrokernelTester()
790 .batch_size(batch_size)
791 .y_zero_point(y_zero_point)
792 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
793 }
794 }
795 }
796
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,a_scale)797 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, a_scale) {
798 TEST_REQUIRES_ARM_NEON_V8;
799 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
800 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
801 VMulCMicrokernelTester()
802 .batch_size(batch_size)
803 .a_scale(a_scale)
804 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
805 }
806 }
807 }
808
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,b_scale)809 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, b_scale) {
810 TEST_REQUIRES_ARM_NEON_V8;
811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
813 VMulCMicrokernelTester()
814 .batch_size(batch_size)
815 .b_scale(b_scale)
816 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
817 }
818 }
819 }
820
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,y_scale)821 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, y_scale) {
822 TEST_REQUIRES_ARM_NEON_V8;
823 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
824 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
825 VMulCMicrokernelTester()
826 .batch_size(batch_size)
827 .y_scale(y_scale)
828 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
829 }
830 }
831 }
832
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,qmin)833 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, qmin) {
834 TEST_REQUIRES_ARM_NEON_V8;
835 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
836 VMulCMicrokernelTester()
837 .batch_size(batch_size)
838 .qmin(128)
839 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
840 }
841 }
842
TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16,qmax)843 TEST(QS8_VMULC_MINMAX_FP32__NEONV8_LD128_X16, qmax) {
844 TEST_REQUIRES_ARM_NEON_V8;
845 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
846 VMulCMicrokernelTester()
847 .batch_size(batch_size)
848 .qmax(128)
849 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
850 }
851 }
852 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
853
854
855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_eq_8)856 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_eq_8) {
857 TEST_REQUIRES_X86_SSE2;
858 VMulCMicrokernelTester()
859 .batch_size(8)
860 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
861 }
862
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_div_8)863 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_div_8) {
864 TEST_REQUIRES_X86_SSE2;
865 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
866 VMulCMicrokernelTester()
867 .batch_size(batch_size)
868 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
869 }
870 }
871
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_lt_8)872 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_lt_8) {
873 TEST_REQUIRES_X86_SSE2;
874 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
875 VMulCMicrokernelTester()
876 .batch_size(batch_size)
877 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
878 }
879 }
880
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_gt_8)881 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_gt_8) {
882 TEST_REQUIRES_X86_SSE2;
883 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
884 VMulCMicrokernelTester()
885 .batch_size(batch_size)
886 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
887 }
888 }
889
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace)890 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace) {
891 TEST_REQUIRES_X86_SSE2;
892 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
893 VMulCMicrokernelTester()
894 .batch_size(batch_size)
895 .inplace(true)
896 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
897 }
898 }
899
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_zero_point)900 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_zero_point) {
901 TEST_REQUIRES_X86_SSE2;
902 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
903 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
904 VMulCMicrokernelTester()
905 .batch_size(batch_size)
906 .a_zero_point(a_zero_point)
907 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
908 }
909 }
910 }
911
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_zero_point)912 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_zero_point) {
913 TEST_REQUIRES_X86_SSE2;
914 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
915 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
916 VMulCMicrokernelTester()
917 .batch_size(batch_size)
918 .b_zero_point(b_zero_point)
919 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
920 }
921 }
922 }
923
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_zero_point)924 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_zero_point) {
925 TEST_REQUIRES_X86_SSE2;
926 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
927 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
928 VMulCMicrokernelTester()
929 .batch_size(batch_size)
930 .y_zero_point(y_zero_point)
931 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
932 }
933 }
934 }
935
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_scale)936 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_scale) {
937 TEST_REQUIRES_X86_SSE2;
938 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
939 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
940 VMulCMicrokernelTester()
941 .batch_size(batch_size)
942 .a_scale(a_scale)
943 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
944 }
945 }
946 }
947
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_scale)948 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_scale) {
949 TEST_REQUIRES_X86_SSE2;
950 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
951 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
952 VMulCMicrokernelTester()
953 .batch_size(batch_size)
954 .b_scale(b_scale)
955 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
956 }
957 }
958 }
959
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_scale)960 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_scale) {
961 TEST_REQUIRES_X86_SSE2;
962 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
963 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
964 VMulCMicrokernelTester()
965 .batch_size(batch_size)
966 .y_scale(y_scale)
967 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
968 }
969 }
970 }
971
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmin)972 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmin) {
973 TEST_REQUIRES_X86_SSE2;
974 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
975 VMulCMicrokernelTester()
976 .batch_size(batch_size)
977 .qmin(128)
978 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
979 }
980 }
981
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmax)982 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmax) {
983 TEST_REQUIRES_X86_SSE2;
984 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
985 VMulCMicrokernelTester()
986 .batch_size(batch_size)
987 .qmax(128)
988 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
989 }
990 }
991 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
992
993
994 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_eq_16)995 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_eq_16) {
996 TEST_REQUIRES_X86_SSE2;
997 VMulCMicrokernelTester()
998 .batch_size(16)
999 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1000 }
1001
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_div_16)1002 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_div_16) {
1003 TEST_REQUIRES_X86_SSE2;
1004 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1005 VMulCMicrokernelTester()
1006 .batch_size(batch_size)
1007 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1008 }
1009 }
1010
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_lt_16)1011 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_lt_16) {
1012 TEST_REQUIRES_X86_SSE2;
1013 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1014 VMulCMicrokernelTester()
1015 .batch_size(batch_size)
1016 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1017 }
1018 }
1019
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_gt_16)1020 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_gt_16) {
1021 TEST_REQUIRES_X86_SSE2;
1022 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1023 VMulCMicrokernelTester()
1024 .batch_size(batch_size)
1025 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1026 }
1027 }
1028
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace)1029 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace) {
1030 TEST_REQUIRES_X86_SSE2;
1031 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1032 VMulCMicrokernelTester()
1033 .batch_size(batch_size)
1034 .inplace(true)
1035 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1036 }
1037 }
1038
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_zero_point)1039 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_zero_point) {
1040 TEST_REQUIRES_X86_SSE2;
1041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1042 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1043 VMulCMicrokernelTester()
1044 .batch_size(batch_size)
1045 .a_zero_point(a_zero_point)
1046 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1047 }
1048 }
1049 }
1050
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_zero_point)1051 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_zero_point) {
1052 TEST_REQUIRES_X86_SSE2;
1053 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1054 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1055 VMulCMicrokernelTester()
1056 .batch_size(batch_size)
1057 .b_zero_point(b_zero_point)
1058 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1059 }
1060 }
1061 }
1062
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_zero_point)1063 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_zero_point) {
1064 TEST_REQUIRES_X86_SSE2;
1065 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1066 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1067 VMulCMicrokernelTester()
1068 .batch_size(batch_size)
1069 .y_zero_point(y_zero_point)
1070 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1071 }
1072 }
1073 }
1074
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_scale)1075 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_scale) {
1076 TEST_REQUIRES_X86_SSE2;
1077 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1079 VMulCMicrokernelTester()
1080 .batch_size(batch_size)
1081 .a_scale(a_scale)
1082 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1083 }
1084 }
1085 }
1086
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_scale)1087 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_scale) {
1088 TEST_REQUIRES_X86_SSE2;
1089 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1090 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1091 VMulCMicrokernelTester()
1092 .batch_size(batch_size)
1093 .b_scale(b_scale)
1094 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1095 }
1096 }
1097 }
1098
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_scale)1099 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_scale) {
1100 TEST_REQUIRES_X86_SSE2;
1101 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1102 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1103 VMulCMicrokernelTester()
1104 .batch_size(batch_size)
1105 .y_scale(y_scale)
1106 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1107 }
1108 }
1109 }
1110
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmin)1111 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmin) {
1112 TEST_REQUIRES_X86_SSE2;
1113 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1114 VMulCMicrokernelTester()
1115 .batch_size(batch_size)
1116 .qmin(128)
1117 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1118 }
1119 }
1120
TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmax)1121 TEST(QS8_VMULC_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmax) {
1122 TEST_REQUIRES_X86_SSE2;
1123 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1124 VMulCMicrokernelTester()
1125 .batch_size(batch_size)
1126 .qmax(128)
1127 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1128 }
1129 }
1130 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1131
1132
1133 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_eq_8)1134 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_eq_8) {
1135 TEST_REQUIRES_X86_SSE41;
1136 VMulCMicrokernelTester()
1137 .batch_size(8)
1138 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1139 }
1140
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_div_8)1141 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_div_8) {
1142 TEST_REQUIRES_X86_SSE41;
1143 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1144 VMulCMicrokernelTester()
1145 .batch_size(batch_size)
1146 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1147 }
1148 }
1149
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_lt_8)1150 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_lt_8) {
1151 TEST_REQUIRES_X86_SSE41;
1152 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1153 VMulCMicrokernelTester()
1154 .batch_size(batch_size)
1155 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1156 }
1157 }
1158
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_gt_8)1159 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_gt_8) {
1160 TEST_REQUIRES_X86_SSE41;
1161 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1162 VMulCMicrokernelTester()
1163 .batch_size(batch_size)
1164 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1165 }
1166 }
1167
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace)1168 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace) {
1169 TEST_REQUIRES_X86_SSE41;
1170 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1171 VMulCMicrokernelTester()
1172 .batch_size(batch_size)
1173 .inplace(true)
1174 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1175 }
1176 }
1177
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_zero_point)1178 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_zero_point) {
1179 TEST_REQUIRES_X86_SSE41;
1180 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1181 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1182 VMulCMicrokernelTester()
1183 .batch_size(batch_size)
1184 .a_zero_point(a_zero_point)
1185 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1186 }
1187 }
1188 }
1189
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_zero_point)1190 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_zero_point) {
1191 TEST_REQUIRES_X86_SSE41;
1192 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1193 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1194 VMulCMicrokernelTester()
1195 .batch_size(batch_size)
1196 .b_zero_point(b_zero_point)
1197 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1198 }
1199 }
1200 }
1201
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_zero_point)1202 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_zero_point) {
1203 TEST_REQUIRES_X86_SSE41;
1204 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1205 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1206 VMulCMicrokernelTester()
1207 .batch_size(batch_size)
1208 .y_zero_point(y_zero_point)
1209 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1210 }
1211 }
1212 }
1213
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_scale)1214 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_scale) {
1215 TEST_REQUIRES_X86_SSE41;
1216 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1217 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1218 VMulCMicrokernelTester()
1219 .batch_size(batch_size)
1220 .a_scale(a_scale)
1221 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1222 }
1223 }
1224 }
1225
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_scale)1226 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_scale) {
1227 TEST_REQUIRES_X86_SSE41;
1228 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1229 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1230 VMulCMicrokernelTester()
1231 .batch_size(batch_size)
1232 .b_scale(b_scale)
1233 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1234 }
1235 }
1236 }
1237
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_scale)1238 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_scale) {
1239 TEST_REQUIRES_X86_SSE41;
1240 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1241 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1242 VMulCMicrokernelTester()
1243 .batch_size(batch_size)
1244 .y_scale(y_scale)
1245 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1246 }
1247 }
1248 }
1249
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmin)1250 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmin) {
1251 TEST_REQUIRES_X86_SSE41;
1252 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1253 VMulCMicrokernelTester()
1254 .batch_size(batch_size)
1255 .qmin(128)
1256 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1257 }
1258 }
1259
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmax)1260 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmax) {
1261 TEST_REQUIRES_X86_SSE41;
1262 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1263 VMulCMicrokernelTester()
1264 .batch_size(batch_size)
1265 .qmax(128)
1266 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1267 }
1268 }
1269 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1270
1271
1272 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_eq_16)1273 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_eq_16) {
1274 TEST_REQUIRES_X86_SSE41;
1275 VMulCMicrokernelTester()
1276 .batch_size(16)
1277 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1278 }
1279
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_div_16)1280 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_div_16) {
1281 TEST_REQUIRES_X86_SSE41;
1282 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1283 VMulCMicrokernelTester()
1284 .batch_size(batch_size)
1285 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1286 }
1287 }
1288
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_lt_16)1289 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_lt_16) {
1290 TEST_REQUIRES_X86_SSE41;
1291 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1292 VMulCMicrokernelTester()
1293 .batch_size(batch_size)
1294 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1295 }
1296 }
1297
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_gt_16)1298 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_gt_16) {
1299 TEST_REQUIRES_X86_SSE41;
1300 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1301 VMulCMicrokernelTester()
1302 .batch_size(batch_size)
1303 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1304 }
1305 }
1306
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace)1307 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace) {
1308 TEST_REQUIRES_X86_SSE41;
1309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1310 VMulCMicrokernelTester()
1311 .batch_size(batch_size)
1312 .inplace(true)
1313 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1314 }
1315 }
1316
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_zero_point)1317 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_zero_point) {
1318 TEST_REQUIRES_X86_SSE41;
1319 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1320 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1321 VMulCMicrokernelTester()
1322 .batch_size(batch_size)
1323 .a_zero_point(a_zero_point)
1324 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_zero_point)1329 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_zero_point) {
1330 TEST_REQUIRES_X86_SSE41;
1331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1332 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1333 VMulCMicrokernelTester()
1334 .batch_size(batch_size)
1335 .b_zero_point(b_zero_point)
1336 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1337 }
1338 }
1339 }
1340
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_zero_point)1341 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_zero_point) {
1342 TEST_REQUIRES_X86_SSE41;
1343 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1344 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1345 VMulCMicrokernelTester()
1346 .batch_size(batch_size)
1347 .y_zero_point(y_zero_point)
1348 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1349 }
1350 }
1351 }
1352
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_scale)1353 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_scale) {
1354 TEST_REQUIRES_X86_SSE41;
1355 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1356 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1357 VMulCMicrokernelTester()
1358 .batch_size(batch_size)
1359 .a_scale(a_scale)
1360 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1361 }
1362 }
1363 }
1364
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_scale)1365 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_scale) {
1366 TEST_REQUIRES_X86_SSE41;
1367 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1368 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1369 VMulCMicrokernelTester()
1370 .batch_size(batch_size)
1371 .b_scale(b_scale)
1372 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1373 }
1374 }
1375 }
1376
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_scale)1377 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_scale) {
1378 TEST_REQUIRES_X86_SSE41;
1379 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1380 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1381 VMulCMicrokernelTester()
1382 .batch_size(batch_size)
1383 .y_scale(y_scale)
1384 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1385 }
1386 }
1387 }
1388
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmin)1389 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmin) {
1390 TEST_REQUIRES_X86_SSE41;
1391 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1392 VMulCMicrokernelTester()
1393 .batch_size(batch_size)
1394 .qmin(128)
1395 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1396 }
1397 }
1398
TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmax)1399 TEST(QS8_VMULC_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmax) {
1400 TEST_REQUIRES_X86_SSE41;
1401 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1402 VMulCMicrokernelTester()
1403 .batch_size(batch_size)
1404 .qmax(128)
1405 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1406 }
1407 }
1408 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1409
1410
1411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_eq_8)1412 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_eq_8) {
1413 TEST_REQUIRES_X86_AVX;
1414 VMulCMicrokernelTester()
1415 .batch_size(8)
1416 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1417 }
1418
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_div_8)1419 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_div_8) {
1420 TEST_REQUIRES_X86_AVX;
1421 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1422 VMulCMicrokernelTester()
1423 .batch_size(batch_size)
1424 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1425 }
1426 }
1427
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_lt_8)1428 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_lt_8) {
1429 TEST_REQUIRES_X86_AVX;
1430 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1431 VMulCMicrokernelTester()
1432 .batch_size(batch_size)
1433 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1434 }
1435 }
1436
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_gt_8)1437 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_gt_8) {
1438 TEST_REQUIRES_X86_AVX;
1439 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1440 VMulCMicrokernelTester()
1441 .batch_size(batch_size)
1442 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1443 }
1444 }
1445
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace)1446 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace) {
1447 TEST_REQUIRES_X86_AVX;
1448 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1449 VMulCMicrokernelTester()
1450 .batch_size(batch_size)
1451 .inplace(true)
1452 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1453 }
1454 }
1455
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,a_zero_point)1456 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, a_zero_point) {
1457 TEST_REQUIRES_X86_AVX;
1458 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1459 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1460 VMulCMicrokernelTester()
1461 .batch_size(batch_size)
1462 .a_zero_point(a_zero_point)
1463 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1464 }
1465 }
1466 }
1467
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,b_zero_point)1468 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, b_zero_point) {
1469 TEST_REQUIRES_X86_AVX;
1470 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1471 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1472 VMulCMicrokernelTester()
1473 .batch_size(batch_size)
1474 .b_zero_point(b_zero_point)
1475 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1476 }
1477 }
1478 }
1479
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,y_zero_point)1480 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, y_zero_point) {
1481 TEST_REQUIRES_X86_AVX;
1482 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1483 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1484 VMulCMicrokernelTester()
1485 .batch_size(batch_size)
1486 .y_zero_point(y_zero_point)
1487 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1488 }
1489 }
1490 }
1491
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,a_scale)1492 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, a_scale) {
1493 TEST_REQUIRES_X86_AVX;
1494 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1495 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1496 VMulCMicrokernelTester()
1497 .batch_size(batch_size)
1498 .a_scale(a_scale)
1499 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1500 }
1501 }
1502 }
1503
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,b_scale)1504 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, b_scale) {
1505 TEST_REQUIRES_X86_AVX;
1506 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1507 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1508 VMulCMicrokernelTester()
1509 .batch_size(batch_size)
1510 .b_scale(b_scale)
1511 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1512 }
1513 }
1514 }
1515
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,y_scale)1516 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, y_scale) {
1517 TEST_REQUIRES_X86_AVX;
1518 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1519 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1520 VMulCMicrokernelTester()
1521 .batch_size(batch_size)
1522 .y_scale(y_scale)
1523 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1524 }
1525 }
1526 }
1527
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,qmin)1528 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, qmin) {
1529 TEST_REQUIRES_X86_AVX;
1530 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1531 VMulCMicrokernelTester()
1532 .batch_size(batch_size)
1533 .qmin(128)
1534 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1535 }
1536 }
1537
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8,qmax)1538 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X8, qmax) {
1539 TEST_REQUIRES_X86_AVX;
1540 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1541 VMulCMicrokernelTester()
1542 .batch_size(batch_size)
1543 .qmax(128)
1544 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1545 }
1546 }
1547 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1548
1549
1550 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_eq_16)1551 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_eq_16) {
1552 TEST_REQUIRES_X86_AVX;
1553 VMulCMicrokernelTester()
1554 .batch_size(16)
1555 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1556 }
1557
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_div_16)1558 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_div_16) {
1559 TEST_REQUIRES_X86_AVX;
1560 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1561 VMulCMicrokernelTester()
1562 .batch_size(batch_size)
1563 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1564 }
1565 }
1566
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_lt_16)1567 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_lt_16) {
1568 TEST_REQUIRES_X86_AVX;
1569 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1570 VMulCMicrokernelTester()
1571 .batch_size(batch_size)
1572 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1573 }
1574 }
1575
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_gt_16)1576 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_gt_16) {
1577 TEST_REQUIRES_X86_AVX;
1578 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1579 VMulCMicrokernelTester()
1580 .batch_size(batch_size)
1581 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1582 }
1583 }
1584
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace)1585 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace) {
1586 TEST_REQUIRES_X86_AVX;
1587 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1588 VMulCMicrokernelTester()
1589 .batch_size(batch_size)
1590 .inplace(true)
1591 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1592 }
1593 }
1594
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,a_zero_point)1595 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, a_zero_point) {
1596 TEST_REQUIRES_X86_AVX;
1597 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1598 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1599 VMulCMicrokernelTester()
1600 .batch_size(batch_size)
1601 .a_zero_point(a_zero_point)
1602 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1603 }
1604 }
1605 }
1606
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,b_zero_point)1607 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, b_zero_point) {
1608 TEST_REQUIRES_X86_AVX;
1609 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1610 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1611 VMulCMicrokernelTester()
1612 .batch_size(batch_size)
1613 .b_zero_point(b_zero_point)
1614 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1615 }
1616 }
1617 }
1618
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,y_zero_point)1619 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, y_zero_point) {
1620 TEST_REQUIRES_X86_AVX;
1621 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1622 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1623 VMulCMicrokernelTester()
1624 .batch_size(batch_size)
1625 .y_zero_point(y_zero_point)
1626 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1627 }
1628 }
1629 }
1630
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,a_scale)1631 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, a_scale) {
1632 TEST_REQUIRES_X86_AVX;
1633 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1634 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1635 VMulCMicrokernelTester()
1636 .batch_size(batch_size)
1637 .a_scale(a_scale)
1638 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1639 }
1640 }
1641 }
1642
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,b_scale)1643 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, b_scale) {
1644 TEST_REQUIRES_X86_AVX;
1645 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1646 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1647 VMulCMicrokernelTester()
1648 .batch_size(batch_size)
1649 .b_scale(b_scale)
1650 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1651 }
1652 }
1653 }
1654
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,y_scale)1655 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, y_scale) {
1656 TEST_REQUIRES_X86_AVX;
1657 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1658 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1659 VMulCMicrokernelTester()
1660 .batch_size(batch_size)
1661 .y_scale(y_scale)
1662 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1663 }
1664 }
1665 }
1666
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,qmin)1667 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, qmin) {
1668 TEST_REQUIRES_X86_AVX;
1669 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1670 VMulCMicrokernelTester()
1671 .batch_size(batch_size)
1672 .qmin(128)
1673 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1674 }
1675 }
1676
TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16,qmax)1677 TEST(QS8_VMULC_MINMAX_FP32__AVX_MUL16_LD64_X16, qmax) {
1678 TEST_REQUIRES_X86_AVX;
1679 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1680 VMulCMicrokernelTester()
1681 .batch_size(batch_size)
1682 .qmax(128)
1683 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1684 }
1685 }
1686 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1687
1688
1689 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_eq_8)1690 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_eq_8) {
1691 VMulCMicrokernelTester()
1692 .batch_size(8)
1693 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1694 }
1695
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_div_8)1696 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_div_8) {
1697 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1698 VMulCMicrokernelTester()
1699 .batch_size(batch_size)
1700 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1701 }
1702 }
1703
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_lt_8)1704 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_lt_8) {
1705 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1706 VMulCMicrokernelTester()
1707 .batch_size(batch_size)
1708 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1709 }
1710 }
1711
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_gt_8)1712 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_gt_8) {
1713 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1714 VMulCMicrokernelTester()
1715 .batch_size(batch_size)
1716 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1717 }
1718 }
1719
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace)1720 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace) {
1721 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1722 VMulCMicrokernelTester()
1723 .batch_size(batch_size)
1724 .inplace(true)
1725 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1726 }
1727 }
1728
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_zero_point)1729 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_zero_point) {
1730 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1731 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1732 VMulCMicrokernelTester()
1733 .batch_size(batch_size)
1734 .a_zero_point(a_zero_point)
1735 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1736 }
1737 }
1738 }
1739
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_zero_point)1740 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_zero_point) {
1741 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1743 VMulCMicrokernelTester()
1744 .batch_size(batch_size)
1745 .b_zero_point(b_zero_point)
1746 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1747 }
1748 }
1749 }
1750
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_zero_point)1751 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_zero_point) {
1752 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1753 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1754 VMulCMicrokernelTester()
1755 .batch_size(batch_size)
1756 .y_zero_point(y_zero_point)
1757 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1758 }
1759 }
1760 }
1761
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_scale)1762 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_scale) {
1763 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1764 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1765 VMulCMicrokernelTester()
1766 .batch_size(batch_size)
1767 .a_scale(a_scale)
1768 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1769 }
1770 }
1771 }
1772
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_scale)1773 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_scale) {
1774 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1775 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1776 VMulCMicrokernelTester()
1777 .batch_size(batch_size)
1778 .b_scale(b_scale)
1779 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1780 }
1781 }
1782 }
1783
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_scale)1784 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_scale) {
1785 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1786 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1787 VMulCMicrokernelTester()
1788 .batch_size(batch_size)
1789 .y_scale(y_scale)
1790 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1791 }
1792 }
1793 }
1794
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmin)1795 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmin) {
1796 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1797 VMulCMicrokernelTester()
1798 .batch_size(batch_size)
1799 .qmin(128)
1800 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1801 }
1802 }
1803
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmax)1804 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmax) {
1805 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1806 VMulCMicrokernelTester()
1807 .batch_size(batch_size)
1808 .qmax(128)
1809 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1810 }
1811 }
1812 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1813
1814
1815 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_eq_16)1816 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_eq_16) {
1817 VMulCMicrokernelTester()
1818 .batch_size(16)
1819 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1820 }
1821
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_div_16)1822 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_div_16) {
1823 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1824 VMulCMicrokernelTester()
1825 .batch_size(batch_size)
1826 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1827 }
1828 }
1829
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_lt_16)1830 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_lt_16) {
1831 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1832 VMulCMicrokernelTester()
1833 .batch_size(batch_size)
1834 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1835 }
1836 }
1837
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_gt_16)1838 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_gt_16) {
1839 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1840 VMulCMicrokernelTester()
1841 .batch_size(batch_size)
1842 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1843 }
1844 }
1845
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace)1846 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace) {
1847 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1848 VMulCMicrokernelTester()
1849 .batch_size(batch_size)
1850 .inplace(true)
1851 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1852 }
1853 }
1854
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_zero_point)1855 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_zero_point) {
1856 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1857 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1858 VMulCMicrokernelTester()
1859 .batch_size(batch_size)
1860 .a_zero_point(a_zero_point)
1861 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1862 }
1863 }
1864 }
1865
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_zero_point)1866 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_zero_point) {
1867 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1868 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1869 VMulCMicrokernelTester()
1870 .batch_size(batch_size)
1871 .b_zero_point(b_zero_point)
1872 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1873 }
1874 }
1875 }
1876
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_zero_point)1877 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_zero_point) {
1878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1879 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1880 VMulCMicrokernelTester()
1881 .batch_size(batch_size)
1882 .y_zero_point(y_zero_point)
1883 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1884 }
1885 }
1886 }
1887
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_scale)1888 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_scale) {
1889 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1890 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1891 VMulCMicrokernelTester()
1892 .batch_size(batch_size)
1893 .a_scale(a_scale)
1894 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1895 }
1896 }
1897 }
1898
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_scale)1899 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_scale) {
1900 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1901 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1902 VMulCMicrokernelTester()
1903 .batch_size(batch_size)
1904 .b_scale(b_scale)
1905 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1906 }
1907 }
1908 }
1909
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_scale)1910 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_scale) {
1911 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1912 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1913 VMulCMicrokernelTester()
1914 .batch_size(batch_size)
1915 .y_scale(y_scale)
1916 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1917 }
1918 }
1919 }
1920
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmin)1921 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmin) {
1922 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1923 VMulCMicrokernelTester()
1924 .batch_size(batch_size)
1925 .qmin(128)
1926 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1927 }
1928 }
1929
TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmax)1930 TEST(QS8_VMULC_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmax) {
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VMulCMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
1935 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1936 }
1937 }
1938 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1939
1940
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,batch_eq_1)1941 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, batch_eq_1) {
1942 VMulCMicrokernelTester()
1943 .batch_size(1)
1944 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1945 }
1946
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,batch_gt_1)1947 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, batch_gt_1) {
1948 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
1949 VMulCMicrokernelTester()
1950 .batch_size(batch_size)
1951 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1952 }
1953 }
1954
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,inplace)1955 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, inplace) {
1956 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1957 VMulCMicrokernelTester()
1958 .batch_size(batch_size)
1959 .inplace(true)
1960 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1961 }
1962 }
1963
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,a_zero_point)1964 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, a_zero_point) {
1965 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1966 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1967 VMulCMicrokernelTester()
1968 .batch_size(batch_size)
1969 .a_zero_point(a_zero_point)
1970 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1971 }
1972 }
1973 }
1974
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,b_zero_point)1975 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, b_zero_point) {
1976 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1977 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1978 VMulCMicrokernelTester()
1979 .batch_size(batch_size)
1980 .b_zero_point(b_zero_point)
1981 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1982 }
1983 }
1984 }
1985
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,y_zero_point)1986 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, y_zero_point) {
1987 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1988 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1989 VMulCMicrokernelTester()
1990 .batch_size(batch_size)
1991 .y_zero_point(y_zero_point)
1992 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
1993 }
1994 }
1995 }
1996
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,a_scale)1997 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, a_scale) {
1998 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
1999 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2000 VMulCMicrokernelTester()
2001 .batch_size(batch_size)
2002 .a_scale(a_scale)
2003 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2004 }
2005 }
2006 }
2007
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,b_scale)2008 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, b_scale) {
2009 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2010 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2011 VMulCMicrokernelTester()
2012 .batch_size(batch_size)
2013 .b_scale(b_scale)
2014 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2015 }
2016 }
2017 }
2018
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,y_scale)2019 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, y_scale) {
2020 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2021 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2022 VMulCMicrokernelTester()
2023 .batch_size(batch_size)
2024 .y_scale(y_scale)
2025 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2026 }
2027 }
2028 }
2029
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,qmin)2030 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, qmin) {
2031 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2032 VMulCMicrokernelTester()
2033 .batch_size(batch_size)
2034 .qmin(128)
2035 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2036 }
2037 }
2038
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1,qmax)2039 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X1, qmax) {
2040 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2041 VMulCMicrokernelTester()
2042 .batch_size(batch_size)
2043 .qmax(128)
2044 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2045 }
2046 }
2047
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_eq_2)2048 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_eq_2) {
2049 VMulCMicrokernelTester()
2050 .batch_size(2)
2051 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2052 }
2053
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_div_2)2054 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_div_2) {
2055 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2056 VMulCMicrokernelTester()
2057 .batch_size(batch_size)
2058 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2059 }
2060 }
2061
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_lt_2)2062 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_lt_2) {
2063 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2064 VMulCMicrokernelTester()
2065 .batch_size(batch_size)
2066 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2067 }
2068 }
2069
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,batch_gt_2)2070 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, batch_gt_2) {
2071 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2072 VMulCMicrokernelTester()
2073 .batch_size(batch_size)
2074 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2075 }
2076 }
2077
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,inplace)2078 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, inplace) {
2079 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2080 VMulCMicrokernelTester()
2081 .batch_size(batch_size)
2082 .inplace(true)
2083 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2084 }
2085 }
2086
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,a_zero_point)2087 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, a_zero_point) {
2088 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2089 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2090 VMulCMicrokernelTester()
2091 .batch_size(batch_size)
2092 .a_zero_point(a_zero_point)
2093 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2094 }
2095 }
2096 }
2097
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,b_zero_point)2098 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, b_zero_point) {
2099 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2100 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2101 VMulCMicrokernelTester()
2102 .batch_size(batch_size)
2103 .b_zero_point(b_zero_point)
2104 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2105 }
2106 }
2107 }
2108
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,y_zero_point)2109 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, y_zero_point) {
2110 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2111 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2112 VMulCMicrokernelTester()
2113 .batch_size(batch_size)
2114 .y_zero_point(y_zero_point)
2115 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2116 }
2117 }
2118 }
2119
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,a_scale)2120 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, a_scale) {
2121 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2122 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2123 VMulCMicrokernelTester()
2124 .batch_size(batch_size)
2125 .a_scale(a_scale)
2126 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2127 }
2128 }
2129 }
2130
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,b_scale)2131 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, b_scale) {
2132 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2133 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2134 VMulCMicrokernelTester()
2135 .batch_size(batch_size)
2136 .b_scale(b_scale)
2137 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2138 }
2139 }
2140 }
2141
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,y_scale)2142 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, y_scale) {
2143 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2144 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2145 VMulCMicrokernelTester()
2146 .batch_size(batch_size)
2147 .y_scale(y_scale)
2148 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2149 }
2150 }
2151 }
2152
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,qmin)2153 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, qmin) {
2154 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2155 VMulCMicrokernelTester()
2156 .batch_size(batch_size)
2157 .qmin(128)
2158 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2159 }
2160 }
2161
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2,qmax)2162 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X2, qmax) {
2163 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2164 VMulCMicrokernelTester()
2165 .batch_size(batch_size)
2166 .qmax(128)
2167 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2168 }
2169 }
2170
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_eq_4)2171 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_eq_4) {
2172 VMulCMicrokernelTester()
2173 .batch_size(4)
2174 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2175 }
2176
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_div_4)2177 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_div_4) {
2178 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2179 VMulCMicrokernelTester()
2180 .batch_size(batch_size)
2181 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2182 }
2183 }
2184
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_lt_4)2185 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_lt_4) {
2186 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2187 VMulCMicrokernelTester()
2188 .batch_size(batch_size)
2189 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2190 }
2191 }
2192
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,batch_gt_4)2193 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, batch_gt_4) {
2194 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2195 VMulCMicrokernelTester()
2196 .batch_size(batch_size)
2197 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2198 }
2199 }
2200
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,inplace)2201 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, inplace) {
2202 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2203 VMulCMicrokernelTester()
2204 .batch_size(batch_size)
2205 .inplace(true)
2206 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2207 }
2208 }
2209
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,a_zero_point)2210 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, a_zero_point) {
2211 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2212 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2213 VMulCMicrokernelTester()
2214 .batch_size(batch_size)
2215 .a_zero_point(a_zero_point)
2216 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2217 }
2218 }
2219 }
2220
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,b_zero_point)2221 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, b_zero_point) {
2222 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2223 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2224 VMulCMicrokernelTester()
2225 .batch_size(batch_size)
2226 .b_zero_point(b_zero_point)
2227 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2228 }
2229 }
2230 }
2231
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,y_zero_point)2232 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, y_zero_point) {
2233 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2235 VMulCMicrokernelTester()
2236 .batch_size(batch_size)
2237 .y_zero_point(y_zero_point)
2238 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2239 }
2240 }
2241 }
2242
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,a_scale)2243 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, a_scale) {
2244 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2245 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2246 VMulCMicrokernelTester()
2247 .batch_size(batch_size)
2248 .a_scale(a_scale)
2249 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2250 }
2251 }
2252 }
2253
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,b_scale)2254 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, b_scale) {
2255 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2256 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2257 VMulCMicrokernelTester()
2258 .batch_size(batch_size)
2259 .b_scale(b_scale)
2260 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2261 }
2262 }
2263 }
2264
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,y_scale)2265 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, y_scale) {
2266 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2267 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2268 VMulCMicrokernelTester()
2269 .batch_size(batch_size)
2270 .y_scale(y_scale)
2271 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2272 }
2273 }
2274 }
2275
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,qmin)2276 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, qmin) {
2277 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2278 VMulCMicrokernelTester()
2279 .batch_size(batch_size)
2280 .qmin(128)
2281 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2282 }
2283 }
2284
TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4,qmax)2285 TEST(QS8_VMULC_MINMAX_FP32__SCALAR_X4, qmax) {
2286 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2287 VMulCMicrokernelTester()
2288 .batch_size(batch_size)
2289 .qmax(128)
2290 .Test(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2291 }
2292 }