1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x8-lut.yaml
8 // Generator: tools/generate-lut-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/lut.h>
17 #include "lut-microkernel-tester.h"
18
19
TEST(X8_LUT__SCALAR_X1,batch_eq_1)20 TEST(X8_LUT__SCALAR_X1, batch_eq_1) {
21 LUTMicrokernelTester()
22 .batch_size(1)
23 .Test(xnn_x8_lut_ukernel__scalar_x1);
24 }
25
TEST(X8_LUT__SCALAR_X1,batch_gt_1)26 TEST(X8_LUT__SCALAR_X1, batch_gt_1) {
27 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
28 LUTMicrokernelTester()
29 .batch_size(batch_size)
30 .Test(xnn_x8_lut_ukernel__scalar_x1);
31 }
32 }
33
TEST(X8_LUT__SCALAR_X1,inplace)34 TEST(X8_LUT__SCALAR_X1, inplace) {
35 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
36 LUTMicrokernelTester()
37 .batch_size(batch_size)
38 .inplace(true)
39 .Test(xnn_x8_lut_ukernel__scalar_x1);
40 }
41 }
42
TEST(X8_LUT__SCALAR_X2,batch_eq_2)43 TEST(X8_LUT__SCALAR_X2, batch_eq_2) {
44 LUTMicrokernelTester()
45 .batch_size(2)
46 .Test(xnn_x8_lut_ukernel__scalar_x2);
47 }
48
TEST(X8_LUT__SCALAR_X2,batch_div_2)49 TEST(X8_LUT__SCALAR_X2, batch_div_2) {
50 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
51 LUTMicrokernelTester()
52 .batch_size(batch_size)
53 .Test(xnn_x8_lut_ukernel__scalar_x2);
54 }
55 }
56
TEST(X8_LUT__SCALAR_X2,batch_lt_2)57 TEST(X8_LUT__SCALAR_X2, batch_lt_2) {
58 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
59 LUTMicrokernelTester()
60 .batch_size(batch_size)
61 .Test(xnn_x8_lut_ukernel__scalar_x2);
62 }
63 }
64
TEST(X8_LUT__SCALAR_X2,batch_gt_2)65 TEST(X8_LUT__SCALAR_X2, batch_gt_2) {
66 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
67 LUTMicrokernelTester()
68 .batch_size(batch_size)
69 .Test(xnn_x8_lut_ukernel__scalar_x2);
70 }
71 }
72
TEST(X8_LUT__SCALAR_X2,inplace)73 TEST(X8_LUT__SCALAR_X2, inplace) {
74 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
75 LUTMicrokernelTester()
76 .batch_size(batch_size)
77 .inplace(true)
78 .Test(xnn_x8_lut_ukernel__scalar_x2);
79 }
80 }
81
TEST(X8_LUT__SCALAR_X4,batch_eq_4)82 TEST(X8_LUT__SCALAR_X4, batch_eq_4) {
83 LUTMicrokernelTester()
84 .batch_size(4)
85 .Test(xnn_x8_lut_ukernel__scalar_x4);
86 }
87
TEST(X8_LUT__SCALAR_X4,batch_div_4)88 TEST(X8_LUT__SCALAR_X4, batch_div_4) {
89 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
90 LUTMicrokernelTester()
91 .batch_size(batch_size)
92 .Test(xnn_x8_lut_ukernel__scalar_x4);
93 }
94 }
95
TEST(X8_LUT__SCALAR_X4,batch_lt_4)96 TEST(X8_LUT__SCALAR_X4, batch_lt_4) {
97 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
98 LUTMicrokernelTester()
99 .batch_size(batch_size)
100 .Test(xnn_x8_lut_ukernel__scalar_x4);
101 }
102 }
103
TEST(X8_LUT__SCALAR_X4,batch_gt_4)104 TEST(X8_LUT__SCALAR_X4, batch_gt_4) {
105 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
106 LUTMicrokernelTester()
107 .batch_size(batch_size)
108 .Test(xnn_x8_lut_ukernel__scalar_x4);
109 }
110 }
111
TEST(X8_LUT__SCALAR_X4,inplace)112 TEST(X8_LUT__SCALAR_X4, inplace) {
113 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
114 LUTMicrokernelTester()
115 .batch_size(batch_size)
116 .inplace(true)
117 .Test(xnn_x8_lut_ukernel__scalar_x4);
118 }
119 }
120
TEST(X8_LUT__SCALAR_X8,batch_eq_8)121 TEST(X8_LUT__SCALAR_X8, batch_eq_8) {
122 LUTMicrokernelTester()
123 .batch_size(8)
124 .Test(xnn_x8_lut_ukernel__scalar_x8);
125 }
126
TEST(X8_LUT__SCALAR_X8,batch_div_8)127 TEST(X8_LUT__SCALAR_X8, batch_div_8) {
128 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
129 LUTMicrokernelTester()
130 .batch_size(batch_size)
131 .Test(xnn_x8_lut_ukernel__scalar_x8);
132 }
133 }
134
TEST(X8_LUT__SCALAR_X8,batch_lt_8)135 TEST(X8_LUT__SCALAR_X8, batch_lt_8) {
136 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
137 LUTMicrokernelTester()
138 .batch_size(batch_size)
139 .Test(xnn_x8_lut_ukernel__scalar_x8);
140 }
141 }
142
TEST(X8_LUT__SCALAR_X8,batch_gt_8)143 TEST(X8_LUT__SCALAR_X8, batch_gt_8) {
144 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
145 LUTMicrokernelTester()
146 .batch_size(batch_size)
147 .Test(xnn_x8_lut_ukernel__scalar_x8);
148 }
149 }
150
TEST(X8_LUT__SCALAR_X8,inplace)151 TEST(X8_LUT__SCALAR_X8, inplace) {
152 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
153 LUTMicrokernelTester()
154 .batch_size(batch_size)
155 .inplace(true)
156 .Test(xnn_x8_lut_ukernel__scalar_x8);
157 }
158 }
159
TEST(X8_LUT__SCALAR_X16,batch_eq_16)160 TEST(X8_LUT__SCALAR_X16, batch_eq_16) {
161 LUTMicrokernelTester()
162 .batch_size(16)
163 .Test(xnn_x8_lut_ukernel__scalar_x16);
164 }
165
TEST(X8_LUT__SCALAR_X16,batch_div_16)166 TEST(X8_LUT__SCALAR_X16, batch_div_16) {
167 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
168 LUTMicrokernelTester()
169 .batch_size(batch_size)
170 .Test(xnn_x8_lut_ukernel__scalar_x16);
171 }
172 }
173
TEST(X8_LUT__SCALAR_X16,batch_lt_16)174 TEST(X8_LUT__SCALAR_X16, batch_lt_16) {
175 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
176 LUTMicrokernelTester()
177 .batch_size(batch_size)
178 .Test(xnn_x8_lut_ukernel__scalar_x16);
179 }
180 }
181
TEST(X8_LUT__SCALAR_X16,batch_gt_16)182 TEST(X8_LUT__SCALAR_X16, batch_gt_16) {
183 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
184 LUTMicrokernelTester()
185 .batch_size(batch_size)
186 .Test(xnn_x8_lut_ukernel__scalar_x16);
187 }
188 }
189
TEST(X8_LUT__SCALAR_X16,inplace)190 TEST(X8_LUT__SCALAR_X16, inplace) {
191 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
192 LUTMicrokernelTester()
193 .batch_size(batch_size)
194 .inplace(true)
195 .Test(xnn_x8_lut_ukernel__scalar_x16);
196 }
197 }
198
199 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X16,batch_eq_16)200 TEST(X8_LUT__NEON_TBX128X4_X16, batch_eq_16) {
201 TEST_REQUIRES_ARM_NEON;
202 LUTMicrokernelTester()
203 .batch_size(16)
204 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
205 }
206
TEST(X8_LUT__NEON_TBX128X4_X16,batch_div_16)207 TEST(X8_LUT__NEON_TBX128X4_X16, batch_div_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
210 LUTMicrokernelTester()
211 .batch_size(batch_size)
212 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
213 }
214 }
215
TEST(X8_LUT__NEON_TBX128X4_X16,batch_lt_16)216 TEST(X8_LUT__NEON_TBX128X4_X16, batch_lt_16) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
219 LUTMicrokernelTester()
220 .batch_size(batch_size)
221 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
222 }
223 }
224
TEST(X8_LUT__NEON_TBX128X4_X16,batch_gt_16)225 TEST(X8_LUT__NEON_TBX128X4_X16, batch_gt_16) {
226 TEST_REQUIRES_ARM_NEON;
227 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
228 LUTMicrokernelTester()
229 .batch_size(batch_size)
230 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
231 }
232 }
233
TEST(X8_LUT__NEON_TBX128X4_X16,inplace)234 TEST(X8_LUT__NEON_TBX128X4_X16, inplace) {
235 TEST_REQUIRES_ARM_NEON;
236 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
237 LUTMicrokernelTester()
238 .batch_size(batch_size)
239 .inplace(true)
240 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
241 }
242 }
243 #endif // XNN_ARCH_ARM64
244
245
246 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X32,batch_eq_32)247 TEST(X8_LUT__NEON_TBX128X4_X32, batch_eq_32) {
248 TEST_REQUIRES_ARM_NEON;
249 LUTMicrokernelTester()
250 .batch_size(32)
251 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
252 }
253
TEST(X8_LUT__NEON_TBX128X4_X32,batch_div_32)254 TEST(X8_LUT__NEON_TBX128X4_X32, batch_div_32) {
255 TEST_REQUIRES_ARM_NEON;
256 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
257 LUTMicrokernelTester()
258 .batch_size(batch_size)
259 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
260 }
261 }
262
TEST(X8_LUT__NEON_TBX128X4_X32,batch_lt_32)263 TEST(X8_LUT__NEON_TBX128X4_X32, batch_lt_32) {
264 TEST_REQUIRES_ARM_NEON;
265 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
266 LUTMicrokernelTester()
267 .batch_size(batch_size)
268 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
269 }
270 }
271
TEST(X8_LUT__NEON_TBX128X4_X32,batch_gt_32)272 TEST(X8_LUT__NEON_TBX128X4_X32, batch_gt_32) {
273 TEST_REQUIRES_ARM_NEON;
274 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
275 LUTMicrokernelTester()
276 .batch_size(batch_size)
277 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
278 }
279 }
280
TEST(X8_LUT__NEON_TBX128X4_X32,inplace)281 TEST(X8_LUT__NEON_TBX128X4_X32, inplace) {
282 TEST_REQUIRES_ARM_NEON;
283 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
284 LUTMicrokernelTester()
285 .batch_size(batch_size)
286 .inplace(true)
287 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
288 }
289 }
290 #endif // XNN_ARCH_ARM64
291
292
293 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X48,batch_eq_48)294 TEST(X8_LUT__NEON_TBX128X4_X48, batch_eq_48) {
295 TEST_REQUIRES_ARM_NEON;
296 LUTMicrokernelTester()
297 .batch_size(48)
298 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
299 }
300
TEST(X8_LUT__NEON_TBX128X4_X48,batch_div_48)301 TEST(X8_LUT__NEON_TBX128X4_X48, batch_div_48) {
302 TEST_REQUIRES_ARM_NEON;
303 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
304 LUTMicrokernelTester()
305 .batch_size(batch_size)
306 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
307 }
308 }
309
TEST(X8_LUT__NEON_TBX128X4_X48,batch_lt_48)310 TEST(X8_LUT__NEON_TBX128X4_X48, batch_lt_48) {
311 TEST_REQUIRES_ARM_NEON;
312 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
313 LUTMicrokernelTester()
314 .batch_size(batch_size)
315 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
316 }
317 }
318
TEST(X8_LUT__NEON_TBX128X4_X48,batch_gt_48)319 TEST(X8_LUT__NEON_TBX128X4_X48, batch_gt_48) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
322 LUTMicrokernelTester()
323 .batch_size(batch_size)
324 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
325 }
326 }
327
TEST(X8_LUT__NEON_TBX128X4_X48,inplace)328 TEST(X8_LUT__NEON_TBX128X4_X48, inplace) {
329 TEST_REQUIRES_ARM_NEON;
330 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
331 LUTMicrokernelTester()
332 .batch_size(batch_size)
333 .inplace(true)
334 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
335 }
336 }
337 #endif // XNN_ARCH_ARM64
338
339
340 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X64,batch_eq_64)341 TEST(X8_LUT__NEON_TBX128X4_X64, batch_eq_64) {
342 TEST_REQUIRES_ARM_NEON;
343 LUTMicrokernelTester()
344 .batch_size(64)
345 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
346 }
347
TEST(X8_LUT__NEON_TBX128X4_X64,batch_div_64)348 TEST(X8_LUT__NEON_TBX128X4_X64, batch_div_64) {
349 TEST_REQUIRES_ARM_NEON;
350 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
351 LUTMicrokernelTester()
352 .batch_size(batch_size)
353 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
354 }
355 }
356
TEST(X8_LUT__NEON_TBX128X4_X64,batch_lt_64)357 TEST(X8_LUT__NEON_TBX128X4_X64, batch_lt_64) {
358 TEST_REQUIRES_ARM_NEON;
359 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
360 LUTMicrokernelTester()
361 .batch_size(batch_size)
362 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
363 }
364 }
365
TEST(X8_LUT__NEON_TBX128X4_X64,batch_gt_64)366 TEST(X8_LUT__NEON_TBX128X4_X64, batch_gt_64) {
367 TEST_REQUIRES_ARM_NEON;
368 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
369 LUTMicrokernelTester()
370 .batch_size(batch_size)
371 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
372 }
373 }
374
TEST(X8_LUT__NEON_TBX128X4_X64,inplace)375 TEST(X8_LUT__NEON_TBX128X4_X64, inplace) {
376 TEST_REQUIRES_ARM_NEON;
377 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
378 LUTMicrokernelTester()
379 .batch_size(batch_size)
380 .inplace(true)
381 .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
382 }
383 }
384 #endif // XNN_ARCH_ARM64
385
386
387 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__SSSE3_X16,batch_eq_16)388 TEST(X8_LUT__SSSE3_X16, batch_eq_16) {
389 TEST_REQUIRES_X86_SSSE3;
390 LUTMicrokernelTester()
391 .batch_size(16)
392 .Test(xnn_x8_lut_ukernel__ssse3_x16);
393 }
394
TEST(X8_LUT__SSSE3_X16,batch_div_16)395 TEST(X8_LUT__SSSE3_X16, batch_div_16) {
396 TEST_REQUIRES_X86_SSSE3;
397 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
398 LUTMicrokernelTester()
399 .batch_size(batch_size)
400 .Test(xnn_x8_lut_ukernel__ssse3_x16);
401 }
402 }
403
TEST(X8_LUT__SSSE3_X16,batch_lt_16)404 TEST(X8_LUT__SSSE3_X16, batch_lt_16) {
405 TEST_REQUIRES_X86_SSSE3;
406 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
407 LUTMicrokernelTester()
408 .batch_size(batch_size)
409 .Test(xnn_x8_lut_ukernel__ssse3_x16);
410 }
411 }
412
TEST(X8_LUT__SSSE3_X16,batch_gt_16)413 TEST(X8_LUT__SSSE3_X16, batch_gt_16) {
414 TEST_REQUIRES_X86_SSSE3;
415 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
416 LUTMicrokernelTester()
417 .batch_size(batch_size)
418 .Test(xnn_x8_lut_ukernel__ssse3_x16);
419 }
420 }
421
TEST(X8_LUT__SSSE3_X16,inplace)422 TEST(X8_LUT__SSSE3_X16, inplace) {
423 TEST_REQUIRES_X86_SSSE3;
424 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
425 LUTMicrokernelTester()
426 .batch_size(batch_size)
427 .inplace(true)
428 .Test(xnn_x8_lut_ukernel__ssse3_x16);
429 }
430 }
431 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
432
433
434 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__SSSE3_X32,batch_eq_32)435 TEST(X8_LUT__SSSE3_X32, batch_eq_32) {
436 TEST_REQUIRES_X86_SSSE3;
437 LUTMicrokernelTester()
438 .batch_size(32)
439 .Test(xnn_x8_lut_ukernel__ssse3_x32);
440 }
441
TEST(X8_LUT__SSSE3_X32,batch_div_32)442 TEST(X8_LUT__SSSE3_X32, batch_div_32) {
443 TEST_REQUIRES_X86_SSSE3;
444 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
445 LUTMicrokernelTester()
446 .batch_size(batch_size)
447 .Test(xnn_x8_lut_ukernel__ssse3_x32);
448 }
449 }
450
TEST(X8_LUT__SSSE3_X32,batch_lt_32)451 TEST(X8_LUT__SSSE3_X32, batch_lt_32) {
452 TEST_REQUIRES_X86_SSSE3;
453 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
454 LUTMicrokernelTester()
455 .batch_size(batch_size)
456 .Test(xnn_x8_lut_ukernel__ssse3_x32);
457 }
458 }
459
TEST(X8_LUT__SSSE3_X32,batch_gt_32)460 TEST(X8_LUT__SSSE3_X32, batch_gt_32) {
461 TEST_REQUIRES_X86_SSSE3;
462 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
463 LUTMicrokernelTester()
464 .batch_size(batch_size)
465 .Test(xnn_x8_lut_ukernel__ssse3_x32);
466 }
467 }
468
TEST(X8_LUT__SSSE3_X32,inplace)469 TEST(X8_LUT__SSSE3_X32, inplace) {
470 TEST_REQUIRES_X86_SSSE3;
471 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
472 LUTMicrokernelTester()
473 .batch_size(batch_size)
474 .inplace(true)
475 .Test(xnn_x8_lut_ukernel__ssse3_x32);
476 }
477 }
478 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
479
480
481 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X16,batch_eq_16)482 TEST(X8_LUT__AVX_X16, batch_eq_16) {
483 TEST_REQUIRES_X86_AVX;
484 LUTMicrokernelTester()
485 .batch_size(16)
486 .Test(xnn_x8_lut_ukernel__avx_x16);
487 }
488
TEST(X8_LUT__AVX_X16,batch_div_16)489 TEST(X8_LUT__AVX_X16, batch_div_16) {
490 TEST_REQUIRES_X86_AVX;
491 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
492 LUTMicrokernelTester()
493 .batch_size(batch_size)
494 .Test(xnn_x8_lut_ukernel__avx_x16);
495 }
496 }
497
TEST(X8_LUT__AVX_X16,batch_lt_16)498 TEST(X8_LUT__AVX_X16, batch_lt_16) {
499 TEST_REQUIRES_X86_AVX;
500 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
501 LUTMicrokernelTester()
502 .batch_size(batch_size)
503 .Test(xnn_x8_lut_ukernel__avx_x16);
504 }
505 }
506
TEST(X8_LUT__AVX_X16,batch_gt_16)507 TEST(X8_LUT__AVX_X16, batch_gt_16) {
508 TEST_REQUIRES_X86_AVX;
509 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
510 LUTMicrokernelTester()
511 .batch_size(batch_size)
512 .Test(xnn_x8_lut_ukernel__avx_x16);
513 }
514 }
515
TEST(X8_LUT__AVX_X16,inplace)516 TEST(X8_LUT__AVX_X16, inplace) {
517 TEST_REQUIRES_X86_AVX;
518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
519 LUTMicrokernelTester()
520 .batch_size(batch_size)
521 .inplace(true)
522 .Test(xnn_x8_lut_ukernel__avx_x16);
523 }
524 }
525 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
526
527
528 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X32,batch_eq_32)529 TEST(X8_LUT__AVX_X32, batch_eq_32) {
530 TEST_REQUIRES_X86_AVX;
531 LUTMicrokernelTester()
532 .batch_size(32)
533 .Test(xnn_x8_lut_ukernel__avx_x32);
534 }
535
TEST(X8_LUT__AVX_X32,batch_div_32)536 TEST(X8_LUT__AVX_X32, batch_div_32) {
537 TEST_REQUIRES_X86_AVX;
538 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
539 LUTMicrokernelTester()
540 .batch_size(batch_size)
541 .Test(xnn_x8_lut_ukernel__avx_x32);
542 }
543 }
544
TEST(X8_LUT__AVX_X32,batch_lt_32)545 TEST(X8_LUT__AVX_X32, batch_lt_32) {
546 TEST_REQUIRES_X86_AVX;
547 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
548 LUTMicrokernelTester()
549 .batch_size(batch_size)
550 .Test(xnn_x8_lut_ukernel__avx_x32);
551 }
552 }
553
TEST(X8_LUT__AVX_X32,batch_gt_32)554 TEST(X8_LUT__AVX_X32, batch_gt_32) {
555 TEST_REQUIRES_X86_AVX;
556 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
557 LUTMicrokernelTester()
558 .batch_size(batch_size)
559 .Test(xnn_x8_lut_ukernel__avx_x32);
560 }
561 }
562
TEST(X8_LUT__AVX_X32,inplace)563 TEST(X8_LUT__AVX_X32, inplace) {
564 TEST_REQUIRES_X86_AVX;
565 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
566 LUTMicrokernelTester()
567 .batch_size(batch_size)
568 .inplace(true)
569 .Test(xnn_x8_lut_ukernel__avx_x32);
570 }
571 }
572 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
573
574
575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X48,batch_eq_48)576 TEST(X8_LUT__AVX_X48, batch_eq_48) {
577 TEST_REQUIRES_X86_AVX;
578 LUTMicrokernelTester()
579 .batch_size(48)
580 .Test(xnn_x8_lut_ukernel__avx_x48);
581 }
582
TEST(X8_LUT__AVX_X48,batch_div_48)583 TEST(X8_LUT__AVX_X48, batch_div_48) {
584 TEST_REQUIRES_X86_AVX;
585 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
586 LUTMicrokernelTester()
587 .batch_size(batch_size)
588 .Test(xnn_x8_lut_ukernel__avx_x48);
589 }
590 }
591
TEST(X8_LUT__AVX_X48,batch_lt_48)592 TEST(X8_LUT__AVX_X48, batch_lt_48) {
593 TEST_REQUIRES_X86_AVX;
594 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
595 LUTMicrokernelTester()
596 .batch_size(batch_size)
597 .Test(xnn_x8_lut_ukernel__avx_x48);
598 }
599 }
600
TEST(X8_LUT__AVX_X48,batch_gt_48)601 TEST(X8_LUT__AVX_X48, batch_gt_48) {
602 TEST_REQUIRES_X86_AVX;
603 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
604 LUTMicrokernelTester()
605 .batch_size(batch_size)
606 .Test(xnn_x8_lut_ukernel__avx_x48);
607 }
608 }
609
TEST(X8_LUT__AVX_X48,inplace)610 TEST(X8_LUT__AVX_X48, inplace) {
611 TEST_REQUIRES_X86_AVX;
612 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
613 LUTMicrokernelTester()
614 .batch_size(batch_size)
615 .inplace(true)
616 .Test(xnn_x8_lut_ukernel__avx_x48);
617 }
618 }
619 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
620
621
622 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X64,batch_eq_64)623 TEST(X8_LUT__AVX_X64, batch_eq_64) {
624 TEST_REQUIRES_X86_AVX;
625 LUTMicrokernelTester()
626 .batch_size(64)
627 .Test(xnn_x8_lut_ukernel__avx_x64);
628 }
629
TEST(X8_LUT__AVX_X64,batch_div_64)630 TEST(X8_LUT__AVX_X64, batch_div_64) {
631 TEST_REQUIRES_X86_AVX;
632 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
633 LUTMicrokernelTester()
634 .batch_size(batch_size)
635 .Test(xnn_x8_lut_ukernel__avx_x64);
636 }
637 }
638
TEST(X8_LUT__AVX_X64,batch_lt_64)639 TEST(X8_LUT__AVX_X64, batch_lt_64) {
640 TEST_REQUIRES_X86_AVX;
641 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
642 LUTMicrokernelTester()
643 .batch_size(batch_size)
644 .Test(xnn_x8_lut_ukernel__avx_x64);
645 }
646 }
647
TEST(X8_LUT__AVX_X64,batch_gt_64)648 TEST(X8_LUT__AVX_X64, batch_gt_64) {
649 TEST_REQUIRES_X86_AVX;
650 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
651 LUTMicrokernelTester()
652 .batch_size(batch_size)
653 .Test(xnn_x8_lut_ukernel__avx_x64);
654 }
655 }
656
TEST(X8_LUT__AVX_X64,inplace)657 TEST(X8_LUT__AVX_X64, inplace) {
658 TEST_REQUIRES_X86_AVX;
659 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
660 LUTMicrokernelTester()
661 .batch_size(batch_size)
662 .inplace(true)
663 .Test(xnn_x8_lut_ukernel__avx_x64);
664 }
665 }
666 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
667
668
669 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X32,batch_eq_32)670 TEST(X8_LUT__AVX2_X32, batch_eq_32) {
671 TEST_REQUIRES_X86_AVX2;
672 LUTMicrokernelTester()
673 .batch_size(32)
674 .Test(xnn_x8_lut_ukernel__avx2_x32);
675 }
676
TEST(X8_LUT__AVX2_X32,batch_div_32)677 TEST(X8_LUT__AVX2_X32, batch_div_32) {
678 TEST_REQUIRES_X86_AVX2;
679 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
680 LUTMicrokernelTester()
681 .batch_size(batch_size)
682 .Test(xnn_x8_lut_ukernel__avx2_x32);
683 }
684 }
685
TEST(X8_LUT__AVX2_X32,batch_lt_32)686 TEST(X8_LUT__AVX2_X32, batch_lt_32) {
687 TEST_REQUIRES_X86_AVX2;
688 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
689 LUTMicrokernelTester()
690 .batch_size(batch_size)
691 .Test(xnn_x8_lut_ukernel__avx2_x32);
692 }
693 }
694
TEST(X8_LUT__AVX2_X32,batch_gt_32)695 TEST(X8_LUT__AVX2_X32, batch_gt_32) {
696 TEST_REQUIRES_X86_AVX2;
697 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
698 LUTMicrokernelTester()
699 .batch_size(batch_size)
700 .Test(xnn_x8_lut_ukernel__avx2_x32);
701 }
702 }
703
TEST(X8_LUT__AVX2_X32,inplace)704 TEST(X8_LUT__AVX2_X32, inplace) {
705 TEST_REQUIRES_X86_AVX2;
706 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
707 LUTMicrokernelTester()
708 .batch_size(batch_size)
709 .inplace(true)
710 .Test(xnn_x8_lut_ukernel__avx2_x32);
711 }
712 }
713 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
714
715
716 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X64,batch_eq_64)717 TEST(X8_LUT__AVX2_X64, batch_eq_64) {
718 TEST_REQUIRES_X86_AVX2;
719 LUTMicrokernelTester()
720 .batch_size(64)
721 .Test(xnn_x8_lut_ukernel__avx2_x64);
722 }
723
TEST(X8_LUT__AVX2_X64,batch_div_64)724 TEST(X8_LUT__AVX2_X64, batch_div_64) {
725 TEST_REQUIRES_X86_AVX2;
726 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
727 LUTMicrokernelTester()
728 .batch_size(batch_size)
729 .Test(xnn_x8_lut_ukernel__avx2_x64);
730 }
731 }
732
TEST(X8_LUT__AVX2_X64,batch_lt_64)733 TEST(X8_LUT__AVX2_X64, batch_lt_64) {
734 TEST_REQUIRES_X86_AVX2;
735 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
736 LUTMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_x8_lut_ukernel__avx2_x64);
739 }
740 }
741
TEST(X8_LUT__AVX2_X64,batch_gt_64)742 TEST(X8_LUT__AVX2_X64, batch_gt_64) {
743 TEST_REQUIRES_X86_AVX2;
744 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
745 LUTMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_x8_lut_ukernel__avx2_x64);
748 }
749 }
750
TEST(X8_LUT__AVX2_X64,inplace)751 TEST(X8_LUT__AVX2_X64, inplace) {
752 TEST_REQUIRES_X86_AVX2;
753 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
754 LUTMicrokernelTester()
755 .batch_size(batch_size)
756 .inplace(true)
757 .Test(xnn_x8_lut_ukernel__avx2_x64);
758 }
759 }
760 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
761
762
763 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X96,batch_eq_96)764 TEST(X8_LUT__AVX2_X96, batch_eq_96) {
765 TEST_REQUIRES_X86_AVX2;
766 LUTMicrokernelTester()
767 .batch_size(96)
768 .Test(xnn_x8_lut_ukernel__avx2_x96);
769 }
770
TEST(X8_LUT__AVX2_X96,batch_div_96)771 TEST(X8_LUT__AVX2_X96, batch_div_96) {
772 TEST_REQUIRES_X86_AVX2;
773 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
774 LUTMicrokernelTester()
775 .batch_size(batch_size)
776 .Test(xnn_x8_lut_ukernel__avx2_x96);
777 }
778 }
779
TEST(X8_LUT__AVX2_X96,batch_lt_96)780 TEST(X8_LUT__AVX2_X96, batch_lt_96) {
781 TEST_REQUIRES_X86_AVX2;
782 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
783 LUTMicrokernelTester()
784 .batch_size(batch_size)
785 .Test(xnn_x8_lut_ukernel__avx2_x96);
786 }
787 }
788
TEST(X8_LUT__AVX2_X96,batch_gt_96)789 TEST(X8_LUT__AVX2_X96, batch_gt_96) {
790 TEST_REQUIRES_X86_AVX2;
791 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
792 LUTMicrokernelTester()
793 .batch_size(batch_size)
794 .Test(xnn_x8_lut_ukernel__avx2_x96);
795 }
796 }
797
TEST(X8_LUT__AVX2_X96,inplace)798 TEST(X8_LUT__AVX2_X96, inplace) {
799 TEST_REQUIRES_X86_AVX2;
800 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
801 LUTMicrokernelTester()
802 .batch_size(batch_size)
803 .inplace(true)
804 .Test(xnn_x8_lut_ukernel__avx2_x96);
805 }
806 }
807 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
808
809
810 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X128,batch_eq_128)811 TEST(X8_LUT__AVX2_X128, batch_eq_128) {
812 TEST_REQUIRES_X86_AVX2;
813 LUTMicrokernelTester()
814 .batch_size(128)
815 .Test(xnn_x8_lut_ukernel__avx2_x128);
816 }
817
TEST(X8_LUT__AVX2_X128,batch_div_128)818 TEST(X8_LUT__AVX2_X128, batch_div_128) {
819 TEST_REQUIRES_X86_AVX2;
820 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
821 LUTMicrokernelTester()
822 .batch_size(batch_size)
823 .Test(xnn_x8_lut_ukernel__avx2_x128);
824 }
825 }
826
TEST(X8_LUT__AVX2_X128,batch_lt_128)827 TEST(X8_LUT__AVX2_X128, batch_lt_128) {
828 TEST_REQUIRES_X86_AVX2;
829 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
830 LUTMicrokernelTester()
831 .batch_size(batch_size)
832 .Test(xnn_x8_lut_ukernel__avx2_x128);
833 }
834 }
835
TEST(X8_LUT__AVX2_X128,batch_gt_128)836 TEST(X8_LUT__AVX2_X128, batch_gt_128) {
837 TEST_REQUIRES_X86_AVX2;
838 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
839 LUTMicrokernelTester()
840 .batch_size(batch_size)
841 .Test(xnn_x8_lut_ukernel__avx2_x128);
842 }
843 }
844
TEST(X8_LUT__AVX2_X128,inplace)845 TEST(X8_LUT__AVX2_X128, inplace) {
846 TEST_REQUIRES_X86_AVX2;
847 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
848 LUTMicrokernelTester()
849 .batch_size(batch_size)
850 .inplace(true)
851 .Test(xnn_x8_lut_ukernel__avx2_x128);
852 }
853 }
854 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
855
856
857 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_eq_64)858 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_eq_64) {
859 TEST_REQUIRES_X86_AVX512SKX;
860 LUTMicrokernelTester()
861 .batch_size(64)
862 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
863 }
864
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_div_64)865 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_div_64) {
866 TEST_REQUIRES_X86_AVX512SKX;
867 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
868 LUTMicrokernelTester()
869 .batch_size(batch_size)
870 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
871 }
872 }
873
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_lt_64)874 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_lt_64) {
875 TEST_REQUIRES_X86_AVX512SKX;
876 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
877 LUTMicrokernelTester()
878 .batch_size(batch_size)
879 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
880 }
881 }
882
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_gt_64)883 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_gt_64) {
884 TEST_REQUIRES_X86_AVX512SKX;
885 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
886 LUTMicrokernelTester()
887 .batch_size(batch_size)
888 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
889 }
890 }
891
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,inplace)892 TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, inplace) {
893 TEST_REQUIRES_X86_AVX512SKX;
894 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
895 LUTMicrokernelTester()
896 .batch_size(batch_size)
897 .inplace(true)
898 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
899 }
900 }
901 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
902
903
904 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_eq_128)905 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_eq_128) {
906 TEST_REQUIRES_X86_AVX512SKX;
907 LUTMicrokernelTester()
908 .batch_size(128)
909 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
910 }
911
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_div_128)912 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_div_128) {
913 TEST_REQUIRES_X86_AVX512SKX;
914 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
915 LUTMicrokernelTester()
916 .batch_size(batch_size)
917 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
918 }
919 }
920
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_lt_128)921 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_lt_128) {
922 TEST_REQUIRES_X86_AVX512SKX;
923 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
924 LUTMicrokernelTester()
925 .batch_size(batch_size)
926 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
927 }
928 }
929
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_gt_128)930 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_gt_128) {
931 TEST_REQUIRES_X86_AVX512SKX;
932 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
933 LUTMicrokernelTester()
934 .batch_size(batch_size)
935 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
936 }
937 }
938
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,inplace)939 TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, inplace) {
940 TEST_REQUIRES_X86_AVX512SKX;
941 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
942 LUTMicrokernelTester()
943 .batch_size(batch_size)
944 .inplace(true)
945 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
946 }
947 }
948 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
949
950
951 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_eq_192)952 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_eq_192) {
953 TEST_REQUIRES_X86_AVX512SKX;
954 LUTMicrokernelTester()
955 .batch_size(192)
956 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
957 }
958
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_div_192)959 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_div_192) {
960 TEST_REQUIRES_X86_AVX512SKX;
961 for (size_t batch_size = 384; batch_size < 1920; batch_size += 192) {
962 LUTMicrokernelTester()
963 .batch_size(batch_size)
964 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
965 }
966 }
967
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_lt_192)968 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_lt_192) {
969 TEST_REQUIRES_X86_AVX512SKX;
970 for (size_t batch_size = 1; batch_size < 192; batch_size++) {
971 LUTMicrokernelTester()
972 .batch_size(batch_size)
973 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
974 }
975 }
976
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_gt_192)977 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_gt_192) {
978 TEST_REQUIRES_X86_AVX512SKX;
979 for (size_t batch_size = 193; batch_size < 384; batch_size++) {
980 LUTMicrokernelTester()
981 .batch_size(batch_size)
982 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
983 }
984 }
985
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,inplace)986 TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, inplace) {
987 TEST_REQUIRES_X86_AVX512SKX;
988 for (size_t batch_size = 1; batch_size <= 960; batch_size += 191) {
989 LUTMicrokernelTester()
990 .batch_size(batch_size)
991 .inplace(true)
992 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
993 }
994 }
995 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
996
997
998 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_eq_256)999 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_eq_256) {
1000 TEST_REQUIRES_X86_AVX512SKX;
1001 LUTMicrokernelTester()
1002 .batch_size(256)
1003 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1004 }
1005
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_div_256)1006 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_div_256) {
1007 TEST_REQUIRES_X86_AVX512SKX;
1008 for (size_t batch_size = 512; batch_size < 2560; batch_size += 256) {
1009 LUTMicrokernelTester()
1010 .batch_size(batch_size)
1011 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1012 }
1013 }
1014
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_lt_256)1015 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_lt_256) {
1016 TEST_REQUIRES_X86_AVX512SKX;
1017 for (size_t batch_size = 1; batch_size < 256; batch_size++) {
1018 LUTMicrokernelTester()
1019 .batch_size(batch_size)
1020 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1021 }
1022 }
1023
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_gt_256)1024 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_gt_256) {
1025 TEST_REQUIRES_X86_AVX512SKX;
1026 for (size_t batch_size = 257; batch_size < 512; batch_size++) {
1027 LUTMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1030 }
1031 }
1032
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,inplace)1033 TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, inplace) {
1034 TEST_REQUIRES_X86_AVX512SKX;
1035 for (size_t batch_size = 1; batch_size <= 1280; batch_size += 255) {
1036 LUTMicrokernelTester()
1037 .batch_size(batch_size)
1038 .inplace(true)
1039 .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1040 }
1041 }
1042 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1043
1044
1045 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X16,batch_eq_16)1046 TEST(X8_LUT__WASMSIMD_X16, batch_eq_16) {
1047 LUTMicrokernelTester()
1048 .batch_size(16)
1049 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1050 }
1051
TEST(X8_LUT__WASMSIMD_X16,batch_div_16)1052 TEST(X8_LUT__WASMSIMD_X16, batch_div_16) {
1053 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1054 LUTMicrokernelTester()
1055 .batch_size(batch_size)
1056 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1057 }
1058 }
1059
TEST(X8_LUT__WASMSIMD_X16,batch_lt_16)1060 TEST(X8_LUT__WASMSIMD_X16, batch_lt_16) {
1061 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1062 LUTMicrokernelTester()
1063 .batch_size(batch_size)
1064 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1065 }
1066 }
1067
TEST(X8_LUT__WASMSIMD_X16,batch_gt_16)1068 TEST(X8_LUT__WASMSIMD_X16, batch_gt_16) {
1069 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1070 LUTMicrokernelTester()
1071 .batch_size(batch_size)
1072 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1073 }
1074 }
1075
TEST(X8_LUT__WASMSIMD_X16,inplace)1076 TEST(X8_LUT__WASMSIMD_X16, inplace) {
1077 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078 LUTMicrokernelTester()
1079 .batch_size(batch_size)
1080 .inplace(true)
1081 .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1082 }
1083 }
1084 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1085
1086
1087 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X32,batch_eq_32)1088 TEST(X8_LUT__WASMSIMD_X32, batch_eq_32) {
1089 LUTMicrokernelTester()
1090 .batch_size(32)
1091 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1092 }
1093
TEST(X8_LUT__WASMSIMD_X32,batch_div_32)1094 TEST(X8_LUT__WASMSIMD_X32, batch_div_32) {
1095 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1096 LUTMicrokernelTester()
1097 .batch_size(batch_size)
1098 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1099 }
1100 }
1101
TEST(X8_LUT__WASMSIMD_X32,batch_lt_32)1102 TEST(X8_LUT__WASMSIMD_X32, batch_lt_32) {
1103 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1104 LUTMicrokernelTester()
1105 .batch_size(batch_size)
1106 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1107 }
1108 }
1109
TEST(X8_LUT__WASMSIMD_X32,batch_gt_32)1110 TEST(X8_LUT__WASMSIMD_X32, batch_gt_32) {
1111 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1112 LUTMicrokernelTester()
1113 .batch_size(batch_size)
1114 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1115 }
1116 }
1117
TEST(X8_LUT__WASMSIMD_X32,inplace)1118 TEST(X8_LUT__WASMSIMD_X32, inplace) {
1119 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1120 LUTMicrokernelTester()
1121 .batch_size(batch_size)
1122 .inplace(true)
1123 .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1124 }
1125 }
1126 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1127
1128
1129 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X48,batch_eq_48)1130 TEST(X8_LUT__WASMSIMD_X48, batch_eq_48) {
1131 LUTMicrokernelTester()
1132 .batch_size(48)
1133 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1134 }
1135
TEST(X8_LUT__WASMSIMD_X48,batch_div_48)1136 TEST(X8_LUT__WASMSIMD_X48, batch_div_48) {
1137 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1138 LUTMicrokernelTester()
1139 .batch_size(batch_size)
1140 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1141 }
1142 }
1143
TEST(X8_LUT__WASMSIMD_X48,batch_lt_48)1144 TEST(X8_LUT__WASMSIMD_X48, batch_lt_48) {
1145 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1146 LUTMicrokernelTester()
1147 .batch_size(batch_size)
1148 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1149 }
1150 }
1151
TEST(X8_LUT__WASMSIMD_X48,batch_gt_48)1152 TEST(X8_LUT__WASMSIMD_X48, batch_gt_48) {
1153 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1154 LUTMicrokernelTester()
1155 .batch_size(batch_size)
1156 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1157 }
1158 }
1159
TEST(X8_LUT__WASMSIMD_X48,inplace)1160 TEST(X8_LUT__WASMSIMD_X48, inplace) {
1161 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1162 LUTMicrokernelTester()
1163 .batch_size(batch_size)
1164 .inplace(true)
1165 .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1166 }
1167 }
1168 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1169
1170
1171 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X64,batch_eq_64)1172 TEST(X8_LUT__WASMSIMD_X64, batch_eq_64) {
1173 LUTMicrokernelTester()
1174 .batch_size(64)
1175 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1176 }
1177
TEST(X8_LUT__WASMSIMD_X64,batch_div_64)1178 TEST(X8_LUT__WASMSIMD_X64, batch_div_64) {
1179 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1180 LUTMicrokernelTester()
1181 .batch_size(batch_size)
1182 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1183 }
1184 }
1185
TEST(X8_LUT__WASMSIMD_X64,batch_lt_64)1186 TEST(X8_LUT__WASMSIMD_X64, batch_lt_64) {
1187 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1188 LUTMicrokernelTester()
1189 .batch_size(batch_size)
1190 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1191 }
1192 }
1193
TEST(X8_LUT__WASMSIMD_X64,batch_gt_64)1194 TEST(X8_LUT__WASMSIMD_X64, batch_gt_64) {
1195 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1196 LUTMicrokernelTester()
1197 .batch_size(batch_size)
1198 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1199 }
1200 }
1201
TEST(X8_LUT__WASMSIMD_X64,inplace)1202 TEST(X8_LUT__WASMSIMD_X64, inplace) {
1203 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1204 LUTMicrokernelTester()
1205 .batch_size(batch_size)
1206 .inplace(true)
1207 .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1208 }
1209 }
1210 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1211