xref: /aosp_15_r20/external/XNNPACK/test/x8-lut.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x8-lut.yaml
8 //   Generator: tools/generate-lut-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/lut.h>
17 #include "lut-microkernel-tester.h"
18 
19 
TEST(X8_LUT__SCALAR_X1,batch_eq_1)20 TEST(X8_LUT__SCALAR_X1, batch_eq_1) {
21   LUTMicrokernelTester()
22     .batch_size(1)
23     .Test(xnn_x8_lut_ukernel__scalar_x1);
24 }
25 
TEST(X8_LUT__SCALAR_X1,batch_gt_1)26 TEST(X8_LUT__SCALAR_X1, batch_gt_1) {
27   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
28     LUTMicrokernelTester()
29       .batch_size(batch_size)
30       .Test(xnn_x8_lut_ukernel__scalar_x1);
31   }
32 }
33 
TEST(X8_LUT__SCALAR_X1,inplace)34 TEST(X8_LUT__SCALAR_X1, inplace) {
35   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
36     LUTMicrokernelTester()
37       .batch_size(batch_size)
38       .inplace(true)
39       .Test(xnn_x8_lut_ukernel__scalar_x1);
40   }
41 }
42 
TEST(X8_LUT__SCALAR_X2,batch_eq_2)43 TEST(X8_LUT__SCALAR_X2, batch_eq_2) {
44   LUTMicrokernelTester()
45     .batch_size(2)
46     .Test(xnn_x8_lut_ukernel__scalar_x2);
47 }
48 
TEST(X8_LUT__SCALAR_X2,batch_div_2)49 TEST(X8_LUT__SCALAR_X2, batch_div_2) {
50   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
51     LUTMicrokernelTester()
52       .batch_size(batch_size)
53       .Test(xnn_x8_lut_ukernel__scalar_x2);
54   }
55 }
56 
TEST(X8_LUT__SCALAR_X2,batch_lt_2)57 TEST(X8_LUT__SCALAR_X2, batch_lt_2) {
58   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
59     LUTMicrokernelTester()
60       .batch_size(batch_size)
61       .Test(xnn_x8_lut_ukernel__scalar_x2);
62   }
63 }
64 
TEST(X8_LUT__SCALAR_X2,batch_gt_2)65 TEST(X8_LUT__SCALAR_X2, batch_gt_2) {
66   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
67     LUTMicrokernelTester()
68       .batch_size(batch_size)
69       .Test(xnn_x8_lut_ukernel__scalar_x2);
70   }
71 }
72 
TEST(X8_LUT__SCALAR_X2,inplace)73 TEST(X8_LUT__SCALAR_X2, inplace) {
74   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
75     LUTMicrokernelTester()
76       .batch_size(batch_size)
77       .inplace(true)
78       .Test(xnn_x8_lut_ukernel__scalar_x2);
79   }
80 }
81 
TEST(X8_LUT__SCALAR_X4,batch_eq_4)82 TEST(X8_LUT__SCALAR_X4, batch_eq_4) {
83   LUTMicrokernelTester()
84     .batch_size(4)
85     .Test(xnn_x8_lut_ukernel__scalar_x4);
86 }
87 
TEST(X8_LUT__SCALAR_X4,batch_div_4)88 TEST(X8_LUT__SCALAR_X4, batch_div_4) {
89   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
90     LUTMicrokernelTester()
91       .batch_size(batch_size)
92       .Test(xnn_x8_lut_ukernel__scalar_x4);
93   }
94 }
95 
TEST(X8_LUT__SCALAR_X4,batch_lt_4)96 TEST(X8_LUT__SCALAR_X4, batch_lt_4) {
97   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
98     LUTMicrokernelTester()
99       .batch_size(batch_size)
100       .Test(xnn_x8_lut_ukernel__scalar_x4);
101   }
102 }
103 
TEST(X8_LUT__SCALAR_X4,batch_gt_4)104 TEST(X8_LUT__SCALAR_X4, batch_gt_4) {
105   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
106     LUTMicrokernelTester()
107       .batch_size(batch_size)
108       .Test(xnn_x8_lut_ukernel__scalar_x4);
109   }
110 }
111 
TEST(X8_LUT__SCALAR_X4,inplace)112 TEST(X8_LUT__SCALAR_X4, inplace) {
113   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
114     LUTMicrokernelTester()
115       .batch_size(batch_size)
116       .inplace(true)
117       .Test(xnn_x8_lut_ukernel__scalar_x4);
118   }
119 }
120 
TEST(X8_LUT__SCALAR_X8,batch_eq_8)121 TEST(X8_LUT__SCALAR_X8, batch_eq_8) {
122   LUTMicrokernelTester()
123     .batch_size(8)
124     .Test(xnn_x8_lut_ukernel__scalar_x8);
125 }
126 
TEST(X8_LUT__SCALAR_X8,batch_div_8)127 TEST(X8_LUT__SCALAR_X8, batch_div_8) {
128   for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
129     LUTMicrokernelTester()
130       .batch_size(batch_size)
131       .Test(xnn_x8_lut_ukernel__scalar_x8);
132   }
133 }
134 
TEST(X8_LUT__SCALAR_X8,batch_lt_8)135 TEST(X8_LUT__SCALAR_X8, batch_lt_8) {
136   for (size_t batch_size = 1; batch_size < 8; batch_size++) {
137     LUTMicrokernelTester()
138       .batch_size(batch_size)
139       .Test(xnn_x8_lut_ukernel__scalar_x8);
140   }
141 }
142 
TEST(X8_LUT__SCALAR_X8,batch_gt_8)143 TEST(X8_LUT__SCALAR_X8, batch_gt_8) {
144   for (size_t batch_size = 9; batch_size < 16; batch_size++) {
145     LUTMicrokernelTester()
146       .batch_size(batch_size)
147       .Test(xnn_x8_lut_ukernel__scalar_x8);
148   }
149 }
150 
TEST(X8_LUT__SCALAR_X8,inplace)151 TEST(X8_LUT__SCALAR_X8, inplace) {
152   for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
153     LUTMicrokernelTester()
154       .batch_size(batch_size)
155       .inplace(true)
156       .Test(xnn_x8_lut_ukernel__scalar_x8);
157   }
158 }
159 
TEST(X8_LUT__SCALAR_X16,batch_eq_16)160 TEST(X8_LUT__SCALAR_X16, batch_eq_16) {
161   LUTMicrokernelTester()
162     .batch_size(16)
163     .Test(xnn_x8_lut_ukernel__scalar_x16);
164 }
165 
TEST(X8_LUT__SCALAR_X16,batch_div_16)166 TEST(X8_LUT__SCALAR_X16, batch_div_16) {
167   for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
168     LUTMicrokernelTester()
169       .batch_size(batch_size)
170       .Test(xnn_x8_lut_ukernel__scalar_x16);
171   }
172 }
173 
TEST(X8_LUT__SCALAR_X16,batch_lt_16)174 TEST(X8_LUT__SCALAR_X16, batch_lt_16) {
175   for (size_t batch_size = 1; batch_size < 16; batch_size++) {
176     LUTMicrokernelTester()
177       .batch_size(batch_size)
178       .Test(xnn_x8_lut_ukernel__scalar_x16);
179   }
180 }
181 
TEST(X8_LUT__SCALAR_X16,batch_gt_16)182 TEST(X8_LUT__SCALAR_X16, batch_gt_16) {
183   for (size_t batch_size = 17; batch_size < 32; batch_size++) {
184     LUTMicrokernelTester()
185       .batch_size(batch_size)
186       .Test(xnn_x8_lut_ukernel__scalar_x16);
187   }
188 }
189 
TEST(X8_LUT__SCALAR_X16,inplace)190 TEST(X8_LUT__SCALAR_X16, inplace) {
191   for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
192     LUTMicrokernelTester()
193       .batch_size(batch_size)
194       .inplace(true)
195       .Test(xnn_x8_lut_ukernel__scalar_x16);
196   }
197 }
198 
199 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X16,batch_eq_16)200   TEST(X8_LUT__NEON_TBX128X4_X16, batch_eq_16) {
201     TEST_REQUIRES_ARM_NEON;
202     LUTMicrokernelTester()
203       .batch_size(16)
204       .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
205   }
206 
TEST(X8_LUT__NEON_TBX128X4_X16,batch_div_16)207   TEST(X8_LUT__NEON_TBX128X4_X16, batch_div_16) {
208     TEST_REQUIRES_ARM_NEON;
209     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
210       LUTMicrokernelTester()
211         .batch_size(batch_size)
212         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
213     }
214   }
215 
TEST(X8_LUT__NEON_TBX128X4_X16,batch_lt_16)216   TEST(X8_LUT__NEON_TBX128X4_X16, batch_lt_16) {
217     TEST_REQUIRES_ARM_NEON;
218     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
219       LUTMicrokernelTester()
220         .batch_size(batch_size)
221         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
222     }
223   }
224 
TEST(X8_LUT__NEON_TBX128X4_X16,batch_gt_16)225   TEST(X8_LUT__NEON_TBX128X4_X16, batch_gt_16) {
226     TEST_REQUIRES_ARM_NEON;
227     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
228       LUTMicrokernelTester()
229         .batch_size(batch_size)
230         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
231     }
232   }
233 
TEST(X8_LUT__NEON_TBX128X4_X16,inplace)234   TEST(X8_LUT__NEON_TBX128X4_X16, inplace) {
235     TEST_REQUIRES_ARM_NEON;
236     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
237       LUTMicrokernelTester()
238         .batch_size(batch_size)
239         .inplace(true)
240         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
241     }
242   }
243 #endif  // XNN_ARCH_ARM64
244 
245 
246 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X32,batch_eq_32)247   TEST(X8_LUT__NEON_TBX128X4_X32, batch_eq_32) {
248     TEST_REQUIRES_ARM_NEON;
249     LUTMicrokernelTester()
250       .batch_size(32)
251       .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
252   }
253 
TEST(X8_LUT__NEON_TBX128X4_X32,batch_div_32)254   TEST(X8_LUT__NEON_TBX128X4_X32, batch_div_32) {
255     TEST_REQUIRES_ARM_NEON;
256     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
257       LUTMicrokernelTester()
258         .batch_size(batch_size)
259         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
260     }
261   }
262 
TEST(X8_LUT__NEON_TBX128X4_X32,batch_lt_32)263   TEST(X8_LUT__NEON_TBX128X4_X32, batch_lt_32) {
264     TEST_REQUIRES_ARM_NEON;
265     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
266       LUTMicrokernelTester()
267         .batch_size(batch_size)
268         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
269     }
270   }
271 
TEST(X8_LUT__NEON_TBX128X4_X32,batch_gt_32)272   TEST(X8_LUT__NEON_TBX128X4_X32, batch_gt_32) {
273     TEST_REQUIRES_ARM_NEON;
274     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
275       LUTMicrokernelTester()
276         .batch_size(batch_size)
277         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
278     }
279   }
280 
TEST(X8_LUT__NEON_TBX128X4_X32,inplace)281   TEST(X8_LUT__NEON_TBX128X4_X32, inplace) {
282     TEST_REQUIRES_ARM_NEON;
283     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
284       LUTMicrokernelTester()
285         .batch_size(batch_size)
286         .inplace(true)
287         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
288     }
289   }
290 #endif  // XNN_ARCH_ARM64
291 
292 
293 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X48,batch_eq_48)294   TEST(X8_LUT__NEON_TBX128X4_X48, batch_eq_48) {
295     TEST_REQUIRES_ARM_NEON;
296     LUTMicrokernelTester()
297       .batch_size(48)
298       .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
299   }
300 
TEST(X8_LUT__NEON_TBX128X4_X48,batch_div_48)301   TEST(X8_LUT__NEON_TBX128X4_X48, batch_div_48) {
302     TEST_REQUIRES_ARM_NEON;
303     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
304       LUTMicrokernelTester()
305         .batch_size(batch_size)
306         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
307     }
308   }
309 
TEST(X8_LUT__NEON_TBX128X4_X48,batch_lt_48)310   TEST(X8_LUT__NEON_TBX128X4_X48, batch_lt_48) {
311     TEST_REQUIRES_ARM_NEON;
312     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
313       LUTMicrokernelTester()
314         .batch_size(batch_size)
315         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
316     }
317   }
318 
TEST(X8_LUT__NEON_TBX128X4_X48,batch_gt_48)319   TEST(X8_LUT__NEON_TBX128X4_X48, batch_gt_48) {
320     TEST_REQUIRES_ARM_NEON;
321     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
322       LUTMicrokernelTester()
323         .batch_size(batch_size)
324         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
325     }
326   }
327 
TEST(X8_LUT__NEON_TBX128X4_X48,inplace)328   TEST(X8_LUT__NEON_TBX128X4_X48, inplace) {
329     TEST_REQUIRES_ARM_NEON;
330     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
331       LUTMicrokernelTester()
332         .batch_size(batch_size)
333         .inplace(true)
334         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
335     }
336   }
337 #endif  // XNN_ARCH_ARM64
338 
339 
340 #if XNN_ARCH_ARM64
TEST(X8_LUT__NEON_TBX128X4_X64,batch_eq_64)341   TEST(X8_LUT__NEON_TBX128X4_X64, batch_eq_64) {
342     TEST_REQUIRES_ARM_NEON;
343     LUTMicrokernelTester()
344       .batch_size(64)
345       .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
346   }
347 
TEST(X8_LUT__NEON_TBX128X4_X64,batch_div_64)348   TEST(X8_LUT__NEON_TBX128X4_X64, batch_div_64) {
349     TEST_REQUIRES_ARM_NEON;
350     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
351       LUTMicrokernelTester()
352         .batch_size(batch_size)
353         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
354     }
355   }
356 
TEST(X8_LUT__NEON_TBX128X4_X64,batch_lt_64)357   TEST(X8_LUT__NEON_TBX128X4_X64, batch_lt_64) {
358     TEST_REQUIRES_ARM_NEON;
359     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
360       LUTMicrokernelTester()
361         .batch_size(batch_size)
362         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
363     }
364   }
365 
TEST(X8_LUT__NEON_TBX128X4_X64,batch_gt_64)366   TEST(X8_LUT__NEON_TBX128X4_X64, batch_gt_64) {
367     TEST_REQUIRES_ARM_NEON;
368     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
369       LUTMicrokernelTester()
370         .batch_size(batch_size)
371         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
372     }
373   }
374 
TEST(X8_LUT__NEON_TBX128X4_X64,inplace)375   TEST(X8_LUT__NEON_TBX128X4_X64, inplace) {
376     TEST_REQUIRES_ARM_NEON;
377     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
378       LUTMicrokernelTester()
379         .batch_size(batch_size)
380         .inplace(true)
381         .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
382     }
383   }
384 #endif  // XNN_ARCH_ARM64
385 
386 
387 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__SSSE3_X16,batch_eq_16)388   TEST(X8_LUT__SSSE3_X16, batch_eq_16) {
389     TEST_REQUIRES_X86_SSSE3;
390     LUTMicrokernelTester()
391       .batch_size(16)
392       .Test(xnn_x8_lut_ukernel__ssse3_x16);
393   }
394 
TEST(X8_LUT__SSSE3_X16,batch_div_16)395   TEST(X8_LUT__SSSE3_X16, batch_div_16) {
396     TEST_REQUIRES_X86_SSSE3;
397     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
398       LUTMicrokernelTester()
399         .batch_size(batch_size)
400         .Test(xnn_x8_lut_ukernel__ssse3_x16);
401     }
402   }
403 
TEST(X8_LUT__SSSE3_X16,batch_lt_16)404   TEST(X8_LUT__SSSE3_X16, batch_lt_16) {
405     TEST_REQUIRES_X86_SSSE3;
406     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
407       LUTMicrokernelTester()
408         .batch_size(batch_size)
409         .Test(xnn_x8_lut_ukernel__ssse3_x16);
410     }
411   }
412 
TEST(X8_LUT__SSSE3_X16,batch_gt_16)413   TEST(X8_LUT__SSSE3_X16, batch_gt_16) {
414     TEST_REQUIRES_X86_SSSE3;
415     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
416       LUTMicrokernelTester()
417         .batch_size(batch_size)
418         .Test(xnn_x8_lut_ukernel__ssse3_x16);
419     }
420   }
421 
TEST(X8_LUT__SSSE3_X16,inplace)422   TEST(X8_LUT__SSSE3_X16, inplace) {
423     TEST_REQUIRES_X86_SSSE3;
424     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
425       LUTMicrokernelTester()
426         .batch_size(batch_size)
427         .inplace(true)
428         .Test(xnn_x8_lut_ukernel__ssse3_x16);
429     }
430   }
431 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
432 
433 
434 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__SSSE3_X32,batch_eq_32)435   TEST(X8_LUT__SSSE3_X32, batch_eq_32) {
436     TEST_REQUIRES_X86_SSSE3;
437     LUTMicrokernelTester()
438       .batch_size(32)
439       .Test(xnn_x8_lut_ukernel__ssse3_x32);
440   }
441 
TEST(X8_LUT__SSSE3_X32,batch_div_32)442   TEST(X8_LUT__SSSE3_X32, batch_div_32) {
443     TEST_REQUIRES_X86_SSSE3;
444     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
445       LUTMicrokernelTester()
446         .batch_size(batch_size)
447         .Test(xnn_x8_lut_ukernel__ssse3_x32);
448     }
449   }
450 
TEST(X8_LUT__SSSE3_X32,batch_lt_32)451   TEST(X8_LUT__SSSE3_X32, batch_lt_32) {
452     TEST_REQUIRES_X86_SSSE3;
453     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
454       LUTMicrokernelTester()
455         .batch_size(batch_size)
456         .Test(xnn_x8_lut_ukernel__ssse3_x32);
457     }
458   }
459 
TEST(X8_LUT__SSSE3_X32,batch_gt_32)460   TEST(X8_LUT__SSSE3_X32, batch_gt_32) {
461     TEST_REQUIRES_X86_SSSE3;
462     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
463       LUTMicrokernelTester()
464         .batch_size(batch_size)
465         .Test(xnn_x8_lut_ukernel__ssse3_x32);
466     }
467   }
468 
TEST(X8_LUT__SSSE3_X32,inplace)469   TEST(X8_LUT__SSSE3_X32, inplace) {
470     TEST_REQUIRES_X86_SSSE3;
471     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
472       LUTMicrokernelTester()
473         .batch_size(batch_size)
474         .inplace(true)
475         .Test(xnn_x8_lut_ukernel__ssse3_x32);
476     }
477   }
478 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
479 
480 
481 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X16,batch_eq_16)482   TEST(X8_LUT__AVX_X16, batch_eq_16) {
483     TEST_REQUIRES_X86_AVX;
484     LUTMicrokernelTester()
485       .batch_size(16)
486       .Test(xnn_x8_lut_ukernel__avx_x16);
487   }
488 
TEST(X8_LUT__AVX_X16,batch_div_16)489   TEST(X8_LUT__AVX_X16, batch_div_16) {
490     TEST_REQUIRES_X86_AVX;
491     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
492       LUTMicrokernelTester()
493         .batch_size(batch_size)
494         .Test(xnn_x8_lut_ukernel__avx_x16);
495     }
496   }
497 
TEST(X8_LUT__AVX_X16,batch_lt_16)498   TEST(X8_LUT__AVX_X16, batch_lt_16) {
499     TEST_REQUIRES_X86_AVX;
500     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
501       LUTMicrokernelTester()
502         .batch_size(batch_size)
503         .Test(xnn_x8_lut_ukernel__avx_x16);
504     }
505   }
506 
TEST(X8_LUT__AVX_X16,batch_gt_16)507   TEST(X8_LUT__AVX_X16, batch_gt_16) {
508     TEST_REQUIRES_X86_AVX;
509     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
510       LUTMicrokernelTester()
511         .batch_size(batch_size)
512         .Test(xnn_x8_lut_ukernel__avx_x16);
513     }
514   }
515 
TEST(X8_LUT__AVX_X16,inplace)516   TEST(X8_LUT__AVX_X16, inplace) {
517     TEST_REQUIRES_X86_AVX;
518     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
519       LUTMicrokernelTester()
520         .batch_size(batch_size)
521         .inplace(true)
522         .Test(xnn_x8_lut_ukernel__avx_x16);
523     }
524   }
525 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
526 
527 
528 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X32,batch_eq_32)529   TEST(X8_LUT__AVX_X32, batch_eq_32) {
530     TEST_REQUIRES_X86_AVX;
531     LUTMicrokernelTester()
532       .batch_size(32)
533       .Test(xnn_x8_lut_ukernel__avx_x32);
534   }
535 
TEST(X8_LUT__AVX_X32,batch_div_32)536   TEST(X8_LUT__AVX_X32, batch_div_32) {
537     TEST_REQUIRES_X86_AVX;
538     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
539       LUTMicrokernelTester()
540         .batch_size(batch_size)
541         .Test(xnn_x8_lut_ukernel__avx_x32);
542     }
543   }
544 
TEST(X8_LUT__AVX_X32,batch_lt_32)545   TEST(X8_LUT__AVX_X32, batch_lt_32) {
546     TEST_REQUIRES_X86_AVX;
547     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
548       LUTMicrokernelTester()
549         .batch_size(batch_size)
550         .Test(xnn_x8_lut_ukernel__avx_x32);
551     }
552   }
553 
TEST(X8_LUT__AVX_X32,batch_gt_32)554   TEST(X8_LUT__AVX_X32, batch_gt_32) {
555     TEST_REQUIRES_X86_AVX;
556     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
557       LUTMicrokernelTester()
558         .batch_size(batch_size)
559         .Test(xnn_x8_lut_ukernel__avx_x32);
560     }
561   }
562 
TEST(X8_LUT__AVX_X32,inplace)563   TEST(X8_LUT__AVX_X32, inplace) {
564     TEST_REQUIRES_X86_AVX;
565     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
566       LUTMicrokernelTester()
567         .batch_size(batch_size)
568         .inplace(true)
569         .Test(xnn_x8_lut_ukernel__avx_x32);
570     }
571   }
572 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
573 
574 
575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X48,batch_eq_48)576   TEST(X8_LUT__AVX_X48, batch_eq_48) {
577     TEST_REQUIRES_X86_AVX;
578     LUTMicrokernelTester()
579       .batch_size(48)
580       .Test(xnn_x8_lut_ukernel__avx_x48);
581   }
582 
TEST(X8_LUT__AVX_X48,batch_div_48)583   TEST(X8_LUT__AVX_X48, batch_div_48) {
584     TEST_REQUIRES_X86_AVX;
585     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
586       LUTMicrokernelTester()
587         .batch_size(batch_size)
588         .Test(xnn_x8_lut_ukernel__avx_x48);
589     }
590   }
591 
TEST(X8_LUT__AVX_X48,batch_lt_48)592   TEST(X8_LUT__AVX_X48, batch_lt_48) {
593     TEST_REQUIRES_X86_AVX;
594     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
595       LUTMicrokernelTester()
596         .batch_size(batch_size)
597         .Test(xnn_x8_lut_ukernel__avx_x48);
598     }
599   }
600 
TEST(X8_LUT__AVX_X48,batch_gt_48)601   TEST(X8_LUT__AVX_X48, batch_gt_48) {
602     TEST_REQUIRES_X86_AVX;
603     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
604       LUTMicrokernelTester()
605         .batch_size(batch_size)
606         .Test(xnn_x8_lut_ukernel__avx_x48);
607     }
608   }
609 
TEST(X8_LUT__AVX_X48,inplace)610   TEST(X8_LUT__AVX_X48, inplace) {
611     TEST_REQUIRES_X86_AVX;
612     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
613       LUTMicrokernelTester()
614         .batch_size(batch_size)
615         .inplace(true)
616         .Test(xnn_x8_lut_ukernel__avx_x48);
617     }
618   }
619 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
620 
621 
622 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX_X64,batch_eq_64)623   TEST(X8_LUT__AVX_X64, batch_eq_64) {
624     TEST_REQUIRES_X86_AVX;
625     LUTMicrokernelTester()
626       .batch_size(64)
627       .Test(xnn_x8_lut_ukernel__avx_x64);
628   }
629 
TEST(X8_LUT__AVX_X64,batch_div_64)630   TEST(X8_LUT__AVX_X64, batch_div_64) {
631     TEST_REQUIRES_X86_AVX;
632     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
633       LUTMicrokernelTester()
634         .batch_size(batch_size)
635         .Test(xnn_x8_lut_ukernel__avx_x64);
636     }
637   }
638 
TEST(X8_LUT__AVX_X64,batch_lt_64)639   TEST(X8_LUT__AVX_X64, batch_lt_64) {
640     TEST_REQUIRES_X86_AVX;
641     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
642       LUTMicrokernelTester()
643         .batch_size(batch_size)
644         .Test(xnn_x8_lut_ukernel__avx_x64);
645     }
646   }
647 
TEST(X8_LUT__AVX_X64,batch_gt_64)648   TEST(X8_LUT__AVX_X64, batch_gt_64) {
649     TEST_REQUIRES_X86_AVX;
650     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
651       LUTMicrokernelTester()
652         .batch_size(batch_size)
653         .Test(xnn_x8_lut_ukernel__avx_x64);
654     }
655   }
656 
TEST(X8_LUT__AVX_X64,inplace)657   TEST(X8_LUT__AVX_X64, inplace) {
658     TEST_REQUIRES_X86_AVX;
659     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
660       LUTMicrokernelTester()
661         .batch_size(batch_size)
662         .inplace(true)
663         .Test(xnn_x8_lut_ukernel__avx_x64);
664     }
665   }
666 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
667 
668 
669 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X32,batch_eq_32)670   TEST(X8_LUT__AVX2_X32, batch_eq_32) {
671     TEST_REQUIRES_X86_AVX2;
672     LUTMicrokernelTester()
673       .batch_size(32)
674       .Test(xnn_x8_lut_ukernel__avx2_x32);
675   }
676 
TEST(X8_LUT__AVX2_X32,batch_div_32)677   TEST(X8_LUT__AVX2_X32, batch_div_32) {
678     TEST_REQUIRES_X86_AVX2;
679     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
680       LUTMicrokernelTester()
681         .batch_size(batch_size)
682         .Test(xnn_x8_lut_ukernel__avx2_x32);
683     }
684   }
685 
TEST(X8_LUT__AVX2_X32,batch_lt_32)686   TEST(X8_LUT__AVX2_X32, batch_lt_32) {
687     TEST_REQUIRES_X86_AVX2;
688     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
689       LUTMicrokernelTester()
690         .batch_size(batch_size)
691         .Test(xnn_x8_lut_ukernel__avx2_x32);
692     }
693   }
694 
TEST(X8_LUT__AVX2_X32,batch_gt_32)695   TEST(X8_LUT__AVX2_X32, batch_gt_32) {
696     TEST_REQUIRES_X86_AVX2;
697     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
698       LUTMicrokernelTester()
699         .batch_size(batch_size)
700         .Test(xnn_x8_lut_ukernel__avx2_x32);
701     }
702   }
703 
TEST(X8_LUT__AVX2_X32,inplace)704   TEST(X8_LUT__AVX2_X32, inplace) {
705     TEST_REQUIRES_X86_AVX2;
706     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
707       LUTMicrokernelTester()
708         .batch_size(batch_size)
709         .inplace(true)
710         .Test(xnn_x8_lut_ukernel__avx2_x32);
711     }
712   }
713 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
714 
715 
716 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X64,batch_eq_64)717   TEST(X8_LUT__AVX2_X64, batch_eq_64) {
718     TEST_REQUIRES_X86_AVX2;
719     LUTMicrokernelTester()
720       .batch_size(64)
721       .Test(xnn_x8_lut_ukernel__avx2_x64);
722   }
723 
TEST(X8_LUT__AVX2_X64,batch_div_64)724   TEST(X8_LUT__AVX2_X64, batch_div_64) {
725     TEST_REQUIRES_X86_AVX2;
726     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
727       LUTMicrokernelTester()
728         .batch_size(batch_size)
729         .Test(xnn_x8_lut_ukernel__avx2_x64);
730     }
731   }
732 
TEST(X8_LUT__AVX2_X64,batch_lt_64)733   TEST(X8_LUT__AVX2_X64, batch_lt_64) {
734     TEST_REQUIRES_X86_AVX2;
735     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
736       LUTMicrokernelTester()
737         .batch_size(batch_size)
738         .Test(xnn_x8_lut_ukernel__avx2_x64);
739     }
740   }
741 
TEST(X8_LUT__AVX2_X64,batch_gt_64)742   TEST(X8_LUT__AVX2_X64, batch_gt_64) {
743     TEST_REQUIRES_X86_AVX2;
744     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
745       LUTMicrokernelTester()
746         .batch_size(batch_size)
747         .Test(xnn_x8_lut_ukernel__avx2_x64);
748     }
749   }
750 
TEST(X8_LUT__AVX2_X64,inplace)751   TEST(X8_LUT__AVX2_X64, inplace) {
752     TEST_REQUIRES_X86_AVX2;
753     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
754       LUTMicrokernelTester()
755         .batch_size(batch_size)
756         .inplace(true)
757         .Test(xnn_x8_lut_ukernel__avx2_x64);
758     }
759   }
760 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
761 
762 
763 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X96,batch_eq_96)764   TEST(X8_LUT__AVX2_X96, batch_eq_96) {
765     TEST_REQUIRES_X86_AVX2;
766     LUTMicrokernelTester()
767       .batch_size(96)
768       .Test(xnn_x8_lut_ukernel__avx2_x96);
769   }
770 
TEST(X8_LUT__AVX2_X96,batch_div_96)771   TEST(X8_LUT__AVX2_X96, batch_div_96) {
772     TEST_REQUIRES_X86_AVX2;
773     for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
774       LUTMicrokernelTester()
775         .batch_size(batch_size)
776         .Test(xnn_x8_lut_ukernel__avx2_x96);
777     }
778   }
779 
TEST(X8_LUT__AVX2_X96,batch_lt_96)780   TEST(X8_LUT__AVX2_X96, batch_lt_96) {
781     TEST_REQUIRES_X86_AVX2;
782     for (size_t batch_size = 1; batch_size < 96; batch_size++) {
783       LUTMicrokernelTester()
784         .batch_size(batch_size)
785         .Test(xnn_x8_lut_ukernel__avx2_x96);
786     }
787   }
788 
TEST(X8_LUT__AVX2_X96,batch_gt_96)789   TEST(X8_LUT__AVX2_X96, batch_gt_96) {
790     TEST_REQUIRES_X86_AVX2;
791     for (size_t batch_size = 97; batch_size < 192; batch_size++) {
792       LUTMicrokernelTester()
793         .batch_size(batch_size)
794         .Test(xnn_x8_lut_ukernel__avx2_x96);
795     }
796   }
797 
TEST(X8_LUT__AVX2_X96,inplace)798   TEST(X8_LUT__AVX2_X96, inplace) {
799     TEST_REQUIRES_X86_AVX2;
800     for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
801       LUTMicrokernelTester()
802         .batch_size(batch_size)
803         .inplace(true)
804         .Test(xnn_x8_lut_ukernel__avx2_x96);
805     }
806   }
807 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
808 
809 
810 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX2_X128,batch_eq_128)811   TEST(X8_LUT__AVX2_X128, batch_eq_128) {
812     TEST_REQUIRES_X86_AVX2;
813     LUTMicrokernelTester()
814       .batch_size(128)
815       .Test(xnn_x8_lut_ukernel__avx2_x128);
816   }
817 
TEST(X8_LUT__AVX2_X128,batch_div_128)818   TEST(X8_LUT__AVX2_X128, batch_div_128) {
819     TEST_REQUIRES_X86_AVX2;
820     for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
821       LUTMicrokernelTester()
822         .batch_size(batch_size)
823         .Test(xnn_x8_lut_ukernel__avx2_x128);
824     }
825   }
826 
TEST(X8_LUT__AVX2_X128,batch_lt_128)827   TEST(X8_LUT__AVX2_X128, batch_lt_128) {
828     TEST_REQUIRES_X86_AVX2;
829     for (size_t batch_size = 1; batch_size < 128; batch_size++) {
830       LUTMicrokernelTester()
831         .batch_size(batch_size)
832         .Test(xnn_x8_lut_ukernel__avx2_x128);
833     }
834   }
835 
TEST(X8_LUT__AVX2_X128,batch_gt_128)836   TEST(X8_LUT__AVX2_X128, batch_gt_128) {
837     TEST_REQUIRES_X86_AVX2;
838     for (size_t batch_size = 129; batch_size < 256; batch_size++) {
839       LUTMicrokernelTester()
840         .batch_size(batch_size)
841         .Test(xnn_x8_lut_ukernel__avx2_x128);
842     }
843   }
844 
TEST(X8_LUT__AVX2_X128,inplace)845   TEST(X8_LUT__AVX2_X128, inplace) {
846     TEST_REQUIRES_X86_AVX2;
847     for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
848       LUTMicrokernelTester()
849         .batch_size(batch_size)
850         .inplace(true)
851         .Test(xnn_x8_lut_ukernel__avx2_x128);
852     }
853   }
854 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
855 
856 
857 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_eq_64)858   TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_eq_64) {
859     TEST_REQUIRES_X86_AVX512SKX;
860     LUTMicrokernelTester()
861       .batch_size(64)
862       .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
863   }
864 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_div_64)865   TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_div_64) {
866     TEST_REQUIRES_X86_AVX512SKX;
867     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
868       LUTMicrokernelTester()
869         .batch_size(batch_size)
870         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
871     }
872   }
873 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_lt_64)874   TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_lt_64) {
875     TEST_REQUIRES_X86_AVX512SKX;
876     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
877       LUTMicrokernelTester()
878         .batch_size(batch_size)
879         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
880     }
881   }
882 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,batch_gt_64)883   TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, batch_gt_64) {
884     TEST_REQUIRES_X86_AVX512SKX;
885     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
886       LUTMicrokernelTester()
887         .batch_size(batch_size)
888         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
889     }
890   }
891 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X64,inplace)892   TEST(X8_LUT__AVX512SKX_VPSHUFB_X64, inplace) {
893     TEST_REQUIRES_X86_AVX512SKX;
894     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
895       LUTMicrokernelTester()
896         .batch_size(batch_size)
897         .inplace(true)
898         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x64);
899     }
900   }
901 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
902 
903 
904 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_eq_128)905   TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_eq_128) {
906     TEST_REQUIRES_X86_AVX512SKX;
907     LUTMicrokernelTester()
908       .batch_size(128)
909       .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
910   }
911 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_div_128)912   TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_div_128) {
913     TEST_REQUIRES_X86_AVX512SKX;
914     for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
915       LUTMicrokernelTester()
916         .batch_size(batch_size)
917         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
918     }
919   }
920 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_lt_128)921   TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_lt_128) {
922     TEST_REQUIRES_X86_AVX512SKX;
923     for (size_t batch_size = 1; batch_size < 128; batch_size++) {
924       LUTMicrokernelTester()
925         .batch_size(batch_size)
926         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
927     }
928   }
929 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,batch_gt_128)930   TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, batch_gt_128) {
931     TEST_REQUIRES_X86_AVX512SKX;
932     for (size_t batch_size = 129; batch_size < 256; batch_size++) {
933       LUTMicrokernelTester()
934         .batch_size(batch_size)
935         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
936     }
937   }
938 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X128,inplace)939   TEST(X8_LUT__AVX512SKX_VPSHUFB_X128, inplace) {
940     TEST_REQUIRES_X86_AVX512SKX;
941     for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
942       LUTMicrokernelTester()
943         .batch_size(batch_size)
944         .inplace(true)
945         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x128);
946     }
947   }
948 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
949 
950 
951 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_eq_192)952   TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_eq_192) {
953     TEST_REQUIRES_X86_AVX512SKX;
954     LUTMicrokernelTester()
955       .batch_size(192)
956       .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
957   }
958 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_div_192)959   TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_div_192) {
960     TEST_REQUIRES_X86_AVX512SKX;
961     for (size_t batch_size = 384; batch_size < 1920; batch_size += 192) {
962       LUTMicrokernelTester()
963         .batch_size(batch_size)
964         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
965     }
966   }
967 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_lt_192)968   TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_lt_192) {
969     TEST_REQUIRES_X86_AVX512SKX;
970     for (size_t batch_size = 1; batch_size < 192; batch_size++) {
971       LUTMicrokernelTester()
972         .batch_size(batch_size)
973         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
974     }
975   }
976 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,batch_gt_192)977   TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, batch_gt_192) {
978     TEST_REQUIRES_X86_AVX512SKX;
979     for (size_t batch_size = 193; batch_size < 384; batch_size++) {
980       LUTMicrokernelTester()
981         .batch_size(batch_size)
982         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
983     }
984   }
985 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X192,inplace)986   TEST(X8_LUT__AVX512SKX_VPSHUFB_X192, inplace) {
987     TEST_REQUIRES_X86_AVX512SKX;
988     for (size_t batch_size = 1; batch_size <= 960; batch_size += 191) {
989       LUTMicrokernelTester()
990         .batch_size(batch_size)
991         .inplace(true)
992         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x192);
993     }
994   }
995 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
996 
997 
998 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_eq_256)999   TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_eq_256) {
1000     TEST_REQUIRES_X86_AVX512SKX;
1001     LUTMicrokernelTester()
1002       .batch_size(256)
1003       .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1004   }
1005 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_div_256)1006   TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_div_256) {
1007     TEST_REQUIRES_X86_AVX512SKX;
1008     for (size_t batch_size = 512; batch_size < 2560; batch_size += 256) {
1009       LUTMicrokernelTester()
1010         .batch_size(batch_size)
1011         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1012     }
1013   }
1014 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_lt_256)1015   TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_lt_256) {
1016     TEST_REQUIRES_X86_AVX512SKX;
1017     for (size_t batch_size = 1; batch_size < 256; batch_size++) {
1018       LUTMicrokernelTester()
1019         .batch_size(batch_size)
1020         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1021     }
1022   }
1023 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,batch_gt_256)1024   TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, batch_gt_256) {
1025     TEST_REQUIRES_X86_AVX512SKX;
1026     for (size_t batch_size = 257; batch_size < 512; batch_size++) {
1027       LUTMicrokernelTester()
1028         .batch_size(batch_size)
1029         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1030     }
1031   }
1032 
TEST(X8_LUT__AVX512SKX_VPSHUFB_X256,inplace)1033   TEST(X8_LUT__AVX512SKX_VPSHUFB_X256, inplace) {
1034     TEST_REQUIRES_X86_AVX512SKX;
1035     for (size_t batch_size = 1; batch_size <= 1280; batch_size += 255) {
1036       LUTMicrokernelTester()
1037         .batch_size(batch_size)
1038         .inplace(true)
1039         .Test(xnn_x8_lut_ukernel__avx512skx_vpshufb_x256);
1040     }
1041   }
1042 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1043 
1044 
1045 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X16,batch_eq_16)1046   TEST(X8_LUT__WASMSIMD_X16, batch_eq_16) {
1047     LUTMicrokernelTester()
1048       .batch_size(16)
1049       .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1050   }
1051 
TEST(X8_LUT__WASMSIMD_X16,batch_div_16)1052   TEST(X8_LUT__WASMSIMD_X16, batch_div_16) {
1053     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1054       LUTMicrokernelTester()
1055         .batch_size(batch_size)
1056         .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1057     }
1058   }
1059 
TEST(X8_LUT__WASMSIMD_X16,batch_lt_16)1060   TEST(X8_LUT__WASMSIMD_X16, batch_lt_16) {
1061     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1062       LUTMicrokernelTester()
1063         .batch_size(batch_size)
1064         .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1065     }
1066   }
1067 
TEST(X8_LUT__WASMSIMD_X16,batch_gt_16)1068   TEST(X8_LUT__WASMSIMD_X16, batch_gt_16) {
1069     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1070       LUTMicrokernelTester()
1071         .batch_size(batch_size)
1072         .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1073     }
1074   }
1075 
TEST(X8_LUT__WASMSIMD_X16,inplace)1076   TEST(X8_LUT__WASMSIMD_X16, inplace) {
1077     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078       LUTMicrokernelTester()
1079         .batch_size(batch_size)
1080         .inplace(true)
1081         .Test(xnn_x8_lut_ukernel__wasmsimd_x16);
1082     }
1083   }
1084 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1085 
1086 
1087 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X32,batch_eq_32)1088   TEST(X8_LUT__WASMSIMD_X32, batch_eq_32) {
1089     LUTMicrokernelTester()
1090       .batch_size(32)
1091       .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1092   }
1093 
TEST(X8_LUT__WASMSIMD_X32,batch_div_32)1094   TEST(X8_LUT__WASMSIMD_X32, batch_div_32) {
1095     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1096       LUTMicrokernelTester()
1097         .batch_size(batch_size)
1098         .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1099     }
1100   }
1101 
TEST(X8_LUT__WASMSIMD_X32,batch_lt_32)1102   TEST(X8_LUT__WASMSIMD_X32, batch_lt_32) {
1103     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1104       LUTMicrokernelTester()
1105         .batch_size(batch_size)
1106         .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1107     }
1108   }
1109 
TEST(X8_LUT__WASMSIMD_X32,batch_gt_32)1110   TEST(X8_LUT__WASMSIMD_X32, batch_gt_32) {
1111     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1112       LUTMicrokernelTester()
1113         .batch_size(batch_size)
1114         .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1115     }
1116   }
1117 
TEST(X8_LUT__WASMSIMD_X32,inplace)1118   TEST(X8_LUT__WASMSIMD_X32, inplace) {
1119     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1120       LUTMicrokernelTester()
1121         .batch_size(batch_size)
1122         .inplace(true)
1123         .Test(xnn_x8_lut_ukernel__wasmsimd_x32);
1124     }
1125   }
1126 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1127 
1128 
1129 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X48,batch_eq_48)1130   TEST(X8_LUT__WASMSIMD_X48, batch_eq_48) {
1131     LUTMicrokernelTester()
1132       .batch_size(48)
1133       .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1134   }
1135 
TEST(X8_LUT__WASMSIMD_X48,batch_div_48)1136   TEST(X8_LUT__WASMSIMD_X48, batch_div_48) {
1137     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
1138       LUTMicrokernelTester()
1139         .batch_size(batch_size)
1140         .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1141     }
1142   }
1143 
TEST(X8_LUT__WASMSIMD_X48,batch_lt_48)1144   TEST(X8_LUT__WASMSIMD_X48, batch_lt_48) {
1145     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
1146       LUTMicrokernelTester()
1147         .batch_size(batch_size)
1148         .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1149     }
1150   }
1151 
TEST(X8_LUT__WASMSIMD_X48,batch_gt_48)1152   TEST(X8_LUT__WASMSIMD_X48, batch_gt_48) {
1153     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
1154       LUTMicrokernelTester()
1155         .batch_size(batch_size)
1156         .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1157     }
1158   }
1159 
TEST(X8_LUT__WASMSIMD_X48,inplace)1160   TEST(X8_LUT__WASMSIMD_X48, inplace) {
1161     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
1162       LUTMicrokernelTester()
1163         .batch_size(batch_size)
1164         .inplace(true)
1165         .Test(xnn_x8_lut_ukernel__wasmsimd_x48);
1166     }
1167   }
1168 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1169 
1170 
1171 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_LUT__WASMSIMD_X64,batch_eq_64)1172   TEST(X8_LUT__WASMSIMD_X64, batch_eq_64) {
1173     LUTMicrokernelTester()
1174       .batch_size(64)
1175       .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1176   }
1177 
TEST(X8_LUT__WASMSIMD_X64,batch_div_64)1178   TEST(X8_LUT__WASMSIMD_X64, batch_div_64) {
1179     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
1180       LUTMicrokernelTester()
1181         .batch_size(batch_size)
1182         .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1183     }
1184   }
1185 
TEST(X8_LUT__WASMSIMD_X64,batch_lt_64)1186   TEST(X8_LUT__WASMSIMD_X64, batch_lt_64) {
1187     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
1188       LUTMicrokernelTester()
1189         .batch_size(batch_size)
1190         .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1191     }
1192   }
1193 
TEST(X8_LUT__WASMSIMD_X64,batch_gt_64)1194   TEST(X8_LUT__WASMSIMD_X64, batch_gt_64) {
1195     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
1196       LUTMicrokernelTester()
1197         .batch_size(batch_size)
1198         .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1199     }
1200   }
1201 
TEST(X8_LUT__WASMSIMD_X64,inplace)1202   TEST(X8_LUT__WASMSIMD_X64, inplace) {
1203     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
1204       LUTMicrokernelTester()
1205         .batch_size(batch_size)
1206         .inplace(true)
1207         .Test(xnn_x8_lut_ukernel__wasmsimd_x64);
1208     }
1209   }
1210 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1211