xref: /aosp_15_r20/external/XNNPACK/test/x8-transpose.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x8-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2)20 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(4)
23     .output_stride(2)
24     .block_width(2)
25     .block_height(1)
26     .element_size(1)
27     .iterations(1)
28     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
29 }
30 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_2_bw_1_4)31 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_2_bw_1_4) {
32   for(size_t i = 1; i <= 2; ++i){
33     for(size_t j = 1; j <= 4; ++j){
34       TransposeMicrokernelTester()
35         .input_stride(j * 3)
36         .output_stride(i * 7)
37         .block_width(j)
38         .block_height(i)
39         .element_size(1)
40         .iterations(1)
41         .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
42     }
43   }
44 }
45 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_4)46 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_4) {
47   TransposeMicrokernelTester()
48     .input_stride(4)
49     .output_stride(1)
50     .block_width(4)
51     .block_height(1)
52     .element_size(1)
53     .iterations(1)
54     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
55 }
56 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_3_4)57 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_3_4) {
58   for(size_t i = 3; i < 4; ++i){
59     TransposeMicrokernelTester()
60       .input_stride(i)
61       .output_stride(2)
62       .block_width(i)
63       .block_height(1)
64       .element_size(1)
65       .iterations(1)
66       .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
67   }
68 }
69 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_bw_3_4)70 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_bw_3_4) {
71   for(size_t i = 3; i < 4; ++i){
72     TransposeMicrokernelTester()
73       .input_stride(i)
74       .output_stride(2)
75       .block_width(i)
76       .block_height(2)
77       .element_size(1)
78       .iterations(1)
79       .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
80   }
81 }
82 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_bw_2)83 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_bw_2) {
84   TransposeMicrokernelTester()
85     .input_stride(2)
86     .output_stride(7)
87     .block_width(2)
88     .block_height(2)
89     .element_size(1)
90     .iterations(1)
91     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
92 }
93 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_2_bw_2)94 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_2_bw_2){
95   for(size_t i = 2; i < 2; ++i){
96     TransposeMicrokernelTester()
97       .input_stride(19)
98       .output_stride(i)
99       .block_width(5)
100       .block_height(i)
101       .element_size(1)
102       .iterations(1)
103       .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
104   }
105 }
106 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_2_bw_4)107 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_2_bw_4){
108   for(size_t i = 2; i < 2; ++i){
109     TransposeMicrokernelTester()
110       .input_stride(4)
111       .output_stride(i)
112       .block_width(4)
113       .block_height(i)
114       .element_size(1)
115       .iterations(1)
116       .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
117   }
118 }
119 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_2_bw_3_4)120 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_2_bw_3_4) {
121   for(size_t i = 2; i < 2; ++i){
122     for(size_t j = 3; j < 4; ++j){
123       TransposeMicrokernelTester()
124         .input_stride(j)
125         .output_stride(i)
126         .block_width(j)
127         .block_height(i)
128         .element_size(1)
129         .iterations(1)
130         .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
131     }
132   }
133 }
134 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2_is_4)135 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_is_4) {
136   TransposeMicrokernelTester()
137     .input_stride(4)
138     .output_stride(1)
139     .block_width(2)
140     .block_height(1)
141     .element_size(1)
142     .iterations(1)
143     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
144 }
145 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2_os_2)146 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(2)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .element_size(1)
153     .iterations(1)
154     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
155 }
156 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2_is_4_os_2)157 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_is_4_os_2) {
158   TransposeMicrokernelTester()
159     .input_stride(4)
160     .output_stride(2)
161     .block_width(2)
162     .block_height(1)
163     .element_size(1)
164     .iterations(1)
165     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
166 }
167 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_17_bw_38_ies_12)168 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_17_bw_38_ies_12) {
169   TransposeMicrokernelTester()
170     .input_stride(38)
171     .output_stride(17)
172     .block_width(38)
173     .block_height(17)
174     .element_size(1)
175     .input_element_stride(12)
176     .iterations(1)
177     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
178 }
179 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_3_bw_10_oes_12)180 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_3_bw_10_oes_12) {
181   TransposeMicrokernelTester()
182     .input_stride(10)
183     .output_stride(3)
184     .block_width(10)
185     .block_height(3)
186     .element_size(1)
187     .output_element_stride(12)
188     .iterations(1)
189     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
190 }
191 
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_7_bw_46_ies_18_oes_14)192 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_7_bw_46_ies_18_oes_14) {
193   TransposeMicrokernelTester()
194     .input_stride(51)
195     .output_stride(13)
196     .block_width(46)
197     .block_height(7)
198     .element_size(1)
199     .input_element_stride(18)
200     .output_element_stride(14)
201     .iterations(1)
202     .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
203 }
204 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4)205 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4) {
206   TransposeMicrokernelTester()
207     .input_stride(8)
208     .output_stride(2)
209     .block_width(4)
210     .block_height(1)
211     .element_size(1)
212     .iterations(1)
213     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
214 }
215 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_2_bw_1_8)216 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_2_bw_1_8) {
217   for(size_t i = 1; i <= 2; ++i){
218     for(size_t j = 1; j <= 8; ++j){
219       TransposeMicrokernelTester()
220         .input_stride(j * 3)
221         .output_stride(i * 7)
222         .block_width(j)
223         .block_height(i)
224         .element_size(1)
225         .iterations(1)
226         .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
227     }
228   }
229 }
230 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_8)231 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_8) {
232   TransposeMicrokernelTester()
233     .input_stride(8)
234     .output_stride(1)
235     .block_width(8)
236     .block_height(1)
237     .element_size(1)
238     .iterations(1)
239     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
240 }
241 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_5_8)242 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_5_8) {
243   for(size_t i = 5; i < 8; ++i){
244     TransposeMicrokernelTester()
245       .input_stride(i)
246       .output_stride(2)
247       .block_width(i)
248       .block_height(1)
249       .element_size(1)
250       .iterations(1)
251       .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
252   }
253 }
254 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_bw_5_8)255 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_bw_5_8) {
256   for(size_t i = 5; i < 8; ++i){
257     TransposeMicrokernelTester()
258       .input_stride(i)
259       .output_stride(2)
260       .block_width(i)
261       .block_height(2)
262       .element_size(1)
263       .iterations(1)
264       .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
265   }
266 }
267 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_bw_4)268 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_bw_4) {
269   TransposeMicrokernelTester()
270     .input_stride(4)
271     .output_stride(7)
272     .block_width(4)
273     .block_height(2)
274     .element_size(1)
275     .iterations(1)
276     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
277 }
278 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_2_bw_4)279 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_2_bw_4){
280   for(size_t i = 2; i < 2; ++i){
281     TransposeMicrokernelTester()
282       .input_stride(21)
283       .output_stride(i)
284       .block_width(7)
285       .block_height(i)
286       .element_size(1)
287       .iterations(1)
288       .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
289   }
290 }
291 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_2_bw_8)292 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_2_bw_8){
293   for(size_t i = 2; i < 2; ++i){
294     TransposeMicrokernelTester()
295       .input_stride(8)
296       .output_stride(i)
297       .block_width(8)
298       .block_height(i)
299       .element_size(1)
300       .iterations(1)
301       .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
302   }
303 }
304 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_2_bw_5_8)305 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_2_bw_5_8) {
306   for(size_t i = 2; i < 2; ++i){
307     for(size_t j = 5; j < 8; ++j){
308       TransposeMicrokernelTester()
309         .input_stride(j)
310         .output_stride(i)
311         .block_width(j)
312         .block_height(i)
313         .element_size(1)
314         .iterations(1)
315         .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
316     }
317   }
318 }
319 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4_is_8)320 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_is_8) {
321   TransposeMicrokernelTester()
322     .input_stride(8)
323     .output_stride(1)
324     .block_width(4)
325     .block_height(1)
326     .element_size(1)
327     .iterations(1)
328     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
329 }
330 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4_os_2)331 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_os_2) {
332   TransposeMicrokernelTester()
333     .input_stride(4)
334     .output_stride(2)
335     .block_width(4)
336     .block_height(1)
337     .element_size(1)
338     .iterations(1)
339     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
340 }
341 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4_is_8_os_2)342 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_is_8_os_2) {
343   TransposeMicrokernelTester()
344     .input_stride(8)
345     .output_stride(2)
346     .block_width(4)
347     .block_height(1)
348     .element_size(1)
349     .iterations(1)
350     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
351 }
352 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_17_bw_76_ies_12)353 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_17_bw_76_ies_12) {
354   TransposeMicrokernelTester()
355     .input_stride(76)
356     .output_stride(17)
357     .block_width(76)
358     .block_height(17)
359     .element_size(1)
360     .input_element_stride(12)
361     .iterations(1)
362     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
363 }
364 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_3_bw_20_oes_12)365 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_3_bw_20_oes_12) {
366   TransposeMicrokernelTester()
367     .input_stride(20)
368     .output_stride(3)
369     .block_width(20)
370     .block_height(3)
371     .element_size(1)
372     .output_element_stride(12)
373     .iterations(1)
374     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
375 }
376 
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_7_bw_92_ies_18_oes_14)377 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_7_bw_92_ies_18_oes_14) {
378   TransposeMicrokernelTester()
379     .input_stride(97)
380     .output_stride(13)
381     .block_width(92)
382     .block_height(7)
383     .element_size(1)
384     .input_element_stride(18)
385     .output_element_stride(14)
386     .iterations(1)
387     .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
388 }
389 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1)390 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1) {
391   TransposeMicrokernelTester()
392     .input_stride(2)
393     .output_stride(4)
394     .block_width(1)
395     .block_height(2)
396     .element_size(1)
397     .iterations(1)
398     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
399 }
400 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_1_4_bw_1_2)401 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_1_4_bw_1_2) {
402   for(size_t i = 1; i <= 4; ++i){
403     for(size_t j = 1; j <= 2; ++j){
404       TransposeMicrokernelTester()
405         .input_stride(j * 3)
406         .output_stride(i * 7)
407         .block_width(j)
408         .block_height(i)
409         .element_size(1)
410         .iterations(1)
411         .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
412     }
413   }
414 }
415 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_2)416 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_2) {
417   TransposeMicrokernelTester()
418     .input_stride(2)
419     .output_stride(2)
420     .block_width(2)
421     .block_height(2)
422     .element_size(1)
423     .iterations(1)
424     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
425 }
426 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_2_2)427 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_2_2) {
428   for(size_t i = 2; i < 2; ++i){
429     TransposeMicrokernelTester()
430       .input_stride(i)
431       .output_stride(4)
432       .block_width(i)
433       .block_height(2)
434       .element_size(1)
435       .iterations(1)
436       .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
437   }
438 }
439 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_4_bw_2_2)440 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_4_bw_2_2) {
441   for(size_t i = 2; i < 2; ++i){
442     TransposeMicrokernelTester()
443       .input_stride(i)
444       .output_stride(4)
445       .block_width(i)
446       .block_height(4)
447       .element_size(1)
448       .iterations(1)
449       .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
450   }
451 }
452 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_4_bw_1)453 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_4_bw_1) {
454   TransposeMicrokernelTester()
455     .input_stride(1)
456     .output_stride(10)
457     .block_width(1)
458     .block_height(4)
459     .element_size(1)
460     .iterations(1)
461     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
462 }
463 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_3_4_bw_1)464 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_3_4_bw_1){
465   for(size_t i = 3; i < 4; ++i){
466     TransposeMicrokernelTester()
467       .input_stride(18)
468       .output_stride(i)
469       .block_width(4)
470       .block_height(i)
471       .element_size(1)
472       .iterations(1)
473       .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
474   }
475 }
476 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_3_4_bw_2)477 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_3_4_bw_2){
478   for(size_t i = 3; i < 4; ++i){
479     TransposeMicrokernelTester()
480       .input_stride(2)
481       .output_stride(i)
482       .block_width(2)
483       .block_height(i)
484       .element_size(1)
485       .iterations(1)
486       .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
487   }
488 }
489 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_3_4_bw_2_2)490 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_3_4_bw_2_2) {
491   for(size_t i = 3; i < 4; ++i){
492     for(size_t j = 2; j < 2; ++j){
493       TransposeMicrokernelTester()
494         .input_stride(j)
495         .output_stride(i)
496         .block_width(j)
497         .block_height(i)
498         .element_size(1)
499         .iterations(1)
500         .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
501     }
502   }
503 }
504 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1_is_2)505 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_is_2) {
506   TransposeMicrokernelTester()
507     .input_stride(2)
508     .output_stride(2)
509     .block_width(1)
510     .block_height(2)
511     .element_size(1)
512     .iterations(1)
513     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
514 }
515 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1_os_4)516 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_os_4) {
517   TransposeMicrokernelTester()
518     .input_stride(1)
519     .output_stride(4)
520     .block_width(1)
521     .block_height(2)
522     .element_size(1)
523     .iterations(1)
524     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
525 }
526 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1_is_2_os_4)527 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_is_2_os_4) {
528   TransposeMicrokernelTester()
529     .input_stride(2)
530     .output_stride(4)
531     .block_width(1)
532     .block_height(2)
533     .element_size(1)
534     .iterations(1)
535     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
536 }
537 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_34_bw_19_ies_12)538 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_34_bw_19_ies_12) {
539   TransposeMicrokernelTester()
540     .input_stride(19)
541     .output_stride(34)
542     .block_width(19)
543     .block_height(34)
544     .element_size(1)
545     .input_element_stride(12)
546     .iterations(1)
547     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
548 }
549 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_6_bw_5_oes_12)550 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_6_bw_5_oes_12) {
551   TransposeMicrokernelTester()
552     .input_stride(5)
553     .output_stride(6)
554     .block_width(5)
555     .block_height(6)
556     .element_size(1)
557     .output_element_stride(12)
558     .iterations(1)
559     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
560 }
561 
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_14_bw_23_ies_18_oes_14)562 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_14_bw_23_ies_18_oes_14) {
563   TransposeMicrokernelTester()
564     .input_stride(28)
565     .output_stride(20)
566     .block_width(23)
567     .block_height(14)
568     .element_size(1)
569     .input_element_stride(18)
570     .output_element_stride(14)
571     .iterations(1)
572     .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
573 }
574 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2)575 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2) {
576   TransposeMicrokernelTester()
577     .input_stride(4)
578     .output_stride(4)
579     .block_width(2)
580     .block_height(2)
581     .element_size(1)
582     .iterations(1)
583     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
584 }
585 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_1_4_bw_1_4)586 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_1_4_bw_1_4) {
587   for(size_t i = 1; i <= 4; ++i){
588     for(size_t j = 1; j <= 4; ++j){
589       TransposeMicrokernelTester()
590         .input_stride(j * 3)
591         .output_stride(i * 7)
592         .block_width(j)
593         .block_height(i)
594         .element_size(1)
595         .iterations(1)
596         .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
597     }
598   }
599 }
600 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_4)601 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_4) {
602   TransposeMicrokernelTester()
603     .input_stride(4)
604     .output_stride(2)
605     .block_width(4)
606     .block_height(2)
607     .element_size(1)
608     .iterations(1)
609     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
610 }
611 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_3_4)612 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_3_4) {
613   for(size_t i = 3; i < 4; ++i){
614     TransposeMicrokernelTester()
615       .input_stride(i)
616       .output_stride(4)
617       .block_width(i)
618       .block_height(2)
619       .element_size(1)
620       .iterations(1)
621       .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
622   }
623 }
624 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_4_bw_3_4)625 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_4_bw_3_4) {
626   for(size_t i = 3; i < 4; ++i){
627     TransposeMicrokernelTester()
628       .input_stride(i)
629       .output_stride(4)
630       .block_width(i)
631       .block_height(4)
632       .element_size(1)
633       .iterations(1)
634       .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
635   }
636 }
637 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_4_bw_2)638 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_4_bw_2) {
639   TransposeMicrokernelTester()
640     .input_stride(2)
641     .output_stride(10)
642     .block_width(2)
643     .block_height(4)
644     .element_size(1)
645     .iterations(1)
646     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
647 }
648 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_3_4_bw_2)649 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_3_4_bw_2){
650   for(size_t i = 3; i < 4; ++i){
651     TransposeMicrokernelTester()
652       .input_stride(19)
653       .output_stride(i)
654       .block_width(5)
655       .block_height(i)
656       .element_size(1)
657       .iterations(1)
658       .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
659   }
660 }
661 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_3_4_bw_4)662 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_3_4_bw_4){
663   for(size_t i = 3; i < 4; ++i){
664     TransposeMicrokernelTester()
665       .input_stride(4)
666       .output_stride(i)
667       .block_width(4)
668       .block_height(i)
669       .element_size(1)
670       .iterations(1)
671       .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
672   }
673 }
674 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_3_4_bw_3_4)675 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_3_4_bw_3_4) {
676   for(size_t i = 3; i < 4; ++i){
677     for(size_t j = 3; j < 4; ++j){
678       TransposeMicrokernelTester()
679         .input_stride(j)
680         .output_stride(i)
681         .block_width(j)
682         .block_height(i)
683         .element_size(1)
684         .iterations(1)
685         .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
686     }
687   }
688 }
689 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2_is_4)690 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_is_4) {
691   TransposeMicrokernelTester()
692     .input_stride(4)
693     .output_stride(2)
694     .block_width(2)
695     .block_height(2)
696     .element_size(1)
697     .iterations(1)
698     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
699 }
700 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2_os_4)701 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_os_4) {
702   TransposeMicrokernelTester()
703     .input_stride(2)
704     .output_stride(4)
705     .block_width(2)
706     .block_height(2)
707     .element_size(1)
708     .iterations(1)
709     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
710 }
711 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2_is_4_os_4)712 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_is_4_os_4) {
713   TransposeMicrokernelTester()
714     .input_stride(4)
715     .output_stride(4)
716     .block_width(2)
717     .block_height(2)
718     .element_size(1)
719     .iterations(1)
720     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
721 }
722 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_34_bw_38_ies_12)723 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_34_bw_38_ies_12) {
724   TransposeMicrokernelTester()
725     .input_stride(38)
726     .output_stride(34)
727     .block_width(38)
728     .block_height(34)
729     .element_size(1)
730     .input_element_stride(12)
731     .iterations(1)
732     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
733 }
734 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_6_bw_10_oes_12)735 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_6_bw_10_oes_12) {
736   TransposeMicrokernelTester()
737     .input_stride(10)
738     .output_stride(6)
739     .block_width(10)
740     .block_height(6)
741     .element_size(1)
742     .output_element_stride(12)
743     .iterations(1)
744     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
745 }
746 
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_14_bw_46_ies_18_oes_14)747 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_14_bw_46_ies_18_oes_14) {
748   TransposeMicrokernelTester()
749     .input_stride(51)
750     .output_stride(20)
751     .block_width(46)
752     .block_height(14)
753     .element_size(1)
754     .input_element_stride(18)
755     .output_element_stride(14)
756     .iterations(1)
757     .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
758 }
759 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4)760 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4) {
761   TransposeMicrokernelTester()
762     .input_stride(8)
763     .output_stride(4)
764     .block_width(4)
765     .block_height(2)
766     .element_size(1)
767     .iterations(1)
768     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
769 }
770 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_1_4_bw_1_8)771 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_1_4_bw_1_8) {
772   for(size_t i = 1; i <= 4; ++i){
773     for(size_t j = 1; j <= 8; ++j){
774       TransposeMicrokernelTester()
775         .input_stride(j * 3)
776         .output_stride(i * 7)
777         .block_width(j)
778         .block_height(i)
779         .element_size(1)
780         .iterations(1)
781         .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
782     }
783   }
784 }
785 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_8)786 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_8) {
787   TransposeMicrokernelTester()
788     .input_stride(8)
789     .output_stride(2)
790     .block_width(8)
791     .block_height(2)
792     .element_size(1)
793     .iterations(1)
794     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
795 }
796 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_5_8)797 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_5_8) {
798   for(size_t i = 5; i < 8; ++i){
799     TransposeMicrokernelTester()
800       .input_stride(i)
801       .output_stride(4)
802       .block_width(i)
803       .block_height(2)
804       .element_size(1)
805       .iterations(1)
806       .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
807   }
808 }
809 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_4_bw_5_8)810 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_4_bw_5_8) {
811   for(size_t i = 5; i < 8; ++i){
812     TransposeMicrokernelTester()
813       .input_stride(i)
814       .output_stride(4)
815       .block_width(i)
816       .block_height(4)
817       .element_size(1)
818       .iterations(1)
819       .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
820   }
821 }
822 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_4_bw_4)823 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_4_bw_4) {
824   TransposeMicrokernelTester()
825     .input_stride(4)
826     .output_stride(10)
827     .block_width(4)
828     .block_height(4)
829     .element_size(1)
830     .iterations(1)
831     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
832 }
833 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_3_4_bw_4)834 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_3_4_bw_4){
835   for(size_t i = 3; i < 4; ++i){
836     TransposeMicrokernelTester()
837       .input_stride(21)
838       .output_stride(i)
839       .block_width(7)
840       .block_height(i)
841       .element_size(1)
842       .iterations(1)
843       .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
844   }
845 }
846 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_3_4_bw_8)847 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_3_4_bw_8){
848   for(size_t i = 3; i < 4; ++i){
849     TransposeMicrokernelTester()
850       .input_stride(8)
851       .output_stride(i)
852       .block_width(8)
853       .block_height(i)
854       .element_size(1)
855       .iterations(1)
856       .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
857   }
858 }
859 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_3_4_bw_5_8)860 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_3_4_bw_5_8) {
861   for(size_t i = 3; i < 4; ++i){
862     for(size_t j = 5; j < 8; ++j){
863       TransposeMicrokernelTester()
864         .input_stride(j)
865         .output_stride(i)
866         .block_width(j)
867         .block_height(i)
868         .element_size(1)
869         .iterations(1)
870         .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
871     }
872   }
873 }
874 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4_is_8)875 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_is_8) {
876   TransposeMicrokernelTester()
877     .input_stride(8)
878     .output_stride(2)
879     .block_width(4)
880     .block_height(2)
881     .element_size(1)
882     .iterations(1)
883     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
884 }
885 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4_os_4)886 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_os_4) {
887   TransposeMicrokernelTester()
888     .input_stride(4)
889     .output_stride(4)
890     .block_width(4)
891     .block_height(2)
892     .element_size(1)
893     .iterations(1)
894     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
895 }
896 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4_is_8_os_4)897 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_is_8_os_4) {
898   TransposeMicrokernelTester()
899     .input_stride(8)
900     .output_stride(4)
901     .block_width(4)
902     .block_height(2)
903     .element_size(1)
904     .iterations(1)
905     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
906 }
907 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_34_bw_76_ies_12)908 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_34_bw_76_ies_12) {
909   TransposeMicrokernelTester()
910     .input_stride(76)
911     .output_stride(34)
912     .block_width(76)
913     .block_height(34)
914     .element_size(1)
915     .input_element_stride(12)
916     .iterations(1)
917     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
918 }
919 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_6_bw_20_oes_12)920 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_6_bw_20_oes_12) {
921   TransposeMicrokernelTester()
922     .input_stride(20)
923     .output_stride(6)
924     .block_width(20)
925     .block_height(6)
926     .element_size(1)
927     .output_element_stride(12)
928     .iterations(1)
929     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
930 }
931 
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_14_bw_92_ies_18_oes_14)932 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_14_bw_92_ies_18_oes_14) {
933   TransposeMicrokernelTester()
934     .input_stride(97)
935     .output_stride(20)
936     .block_width(92)
937     .block_height(14)
938     .element_size(1)
939     .input_element_stride(18)
940     .output_element_stride(14)
941     .iterations(1)
942     .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
943 }
944 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1)945 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1) {
946   TransposeMicrokernelTester()
947     .input_stride(2)
948     .output_stride(8)
949     .block_width(1)
950     .block_height(4)
951     .element_size(1)
952     .iterations(1)
953     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
954 }
955 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_1_8_bw_1_2)956 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_1_8_bw_1_2) {
957   for(size_t i = 1; i <= 8; ++i){
958     for(size_t j = 1; j <= 2; ++j){
959       TransposeMicrokernelTester()
960         .input_stride(j * 3)
961         .output_stride(i * 7)
962         .block_width(j)
963         .block_height(i)
964         .element_size(1)
965         .iterations(1)
966         .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
967     }
968   }
969 }
970 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_2)971 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_2) {
972   TransposeMicrokernelTester()
973     .input_stride(2)
974     .output_stride(4)
975     .block_width(2)
976     .block_height(4)
977     .element_size(1)
978     .iterations(1)
979     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
980 }
981 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_2_2)982 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_2_2) {
983   for(size_t i = 2; i < 2; ++i){
984     TransposeMicrokernelTester()
985       .input_stride(i)
986       .output_stride(8)
987       .block_width(i)
988       .block_height(4)
989       .element_size(1)
990       .iterations(1)
991       .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
992   }
993 }
994 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_8_bw_2_2)995 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_8_bw_2_2) {
996   for(size_t i = 2; i < 2; ++i){
997     TransposeMicrokernelTester()
998       .input_stride(i)
999       .output_stride(8)
1000       .block_width(i)
1001       .block_height(8)
1002       .element_size(1)
1003       .iterations(1)
1004       .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1005   }
1006 }
1007 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_8_bw_1)1008 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_8_bw_1) {
1009   TransposeMicrokernelTester()
1010     .input_stride(1)
1011     .output_stride(16)
1012     .block_width(1)
1013     .block_height(8)
1014     .element_size(1)
1015     .iterations(1)
1016     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1017 }
1018 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_5_8_bw_1)1019 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_5_8_bw_1){
1020   for(size_t i = 5; i < 8; ++i){
1021     TransposeMicrokernelTester()
1022       .input_stride(18)
1023       .output_stride(i)
1024       .block_width(4)
1025       .block_height(i)
1026       .element_size(1)
1027       .iterations(1)
1028       .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1029   }
1030 }
1031 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_5_8_bw_2)1032 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_5_8_bw_2){
1033   for(size_t i = 5; i < 8; ++i){
1034     TransposeMicrokernelTester()
1035       .input_stride(2)
1036       .output_stride(i)
1037       .block_width(2)
1038       .block_height(i)
1039       .element_size(1)
1040       .iterations(1)
1041       .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1042   }
1043 }
1044 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_5_8_bw_2_2)1045 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_5_8_bw_2_2) {
1046   for(size_t i = 5; i < 8; ++i){
1047     for(size_t j = 2; j < 2; ++j){
1048       TransposeMicrokernelTester()
1049         .input_stride(j)
1050         .output_stride(i)
1051         .block_width(j)
1052         .block_height(i)
1053         .element_size(1)
1054         .iterations(1)
1055         .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1056     }
1057   }
1058 }
1059 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1_is_2)1060 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_is_2) {
1061   TransposeMicrokernelTester()
1062     .input_stride(2)
1063     .output_stride(4)
1064     .block_width(1)
1065     .block_height(4)
1066     .element_size(1)
1067     .iterations(1)
1068     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1069 }
1070 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1_os_8)1071 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_os_8) {
1072   TransposeMicrokernelTester()
1073     .input_stride(1)
1074     .output_stride(8)
1075     .block_width(1)
1076     .block_height(4)
1077     .element_size(1)
1078     .iterations(1)
1079     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1080 }
1081 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1_is_2_os_8)1082 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_is_2_os_8) {
1083   TransposeMicrokernelTester()
1084     .input_stride(2)
1085     .output_stride(8)
1086     .block_width(1)
1087     .block_height(4)
1088     .element_size(1)
1089     .iterations(1)
1090     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1091 }
1092 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_68_bw_19_ies_12)1093 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_68_bw_19_ies_12) {
1094   TransposeMicrokernelTester()
1095     .input_stride(19)
1096     .output_stride(68)
1097     .block_width(19)
1098     .block_height(68)
1099     .element_size(1)
1100     .input_element_stride(12)
1101     .iterations(1)
1102     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1103 }
1104 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_12_bw_5_oes_12)1105 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_12_bw_5_oes_12) {
1106   TransposeMicrokernelTester()
1107     .input_stride(5)
1108     .output_stride(12)
1109     .block_width(5)
1110     .block_height(12)
1111     .element_size(1)
1112     .output_element_stride(12)
1113     .iterations(1)
1114     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1115 }
1116 
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_28_bw_23_ies_18_oes_14)1117 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_28_bw_23_ies_18_oes_14) {
1118   TransposeMicrokernelTester()
1119     .input_stride(28)
1120     .output_stride(34)
1121     .block_width(23)
1122     .block_height(28)
1123     .element_size(1)
1124     .input_element_stride(18)
1125     .output_element_stride(14)
1126     .iterations(1)
1127     .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1128 }
1129 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2)1130 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2) {
1131   TransposeMicrokernelTester()
1132     .input_stride(4)
1133     .output_stride(8)
1134     .block_width(2)
1135     .block_height(4)
1136     .element_size(1)
1137     .iterations(1)
1138     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1139 }
1140 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_1_8_bw_1_4)1141 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_1_8_bw_1_4) {
1142   for(size_t i = 1; i <= 8; ++i){
1143     for(size_t j = 1; j <= 4; ++j){
1144       TransposeMicrokernelTester()
1145         .input_stride(j * 3)
1146         .output_stride(i * 7)
1147         .block_width(j)
1148         .block_height(i)
1149         .element_size(1)
1150         .iterations(1)
1151         .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1152     }
1153   }
1154 }
1155 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_4)1156 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_4) {
1157   TransposeMicrokernelTester()
1158     .input_stride(4)
1159     .output_stride(4)
1160     .block_width(4)
1161     .block_height(4)
1162     .element_size(1)
1163     .iterations(1)
1164     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1165 }
1166 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_3_4)1167 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_3_4) {
1168   for(size_t i = 3; i < 4; ++i){
1169     TransposeMicrokernelTester()
1170       .input_stride(i)
1171       .output_stride(8)
1172       .block_width(i)
1173       .block_height(4)
1174       .element_size(1)
1175       .iterations(1)
1176       .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1177   }
1178 }
1179 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_8_bw_3_4)1180 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_8_bw_3_4) {
1181   for(size_t i = 3; i < 4; ++i){
1182     TransposeMicrokernelTester()
1183       .input_stride(i)
1184       .output_stride(8)
1185       .block_width(i)
1186       .block_height(8)
1187       .element_size(1)
1188       .iterations(1)
1189       .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1190   }
1191 }
1192 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_8_bw_2)1193 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_8_bw_2) {
1194   TransposeMicrokernelTester()
1195     .input_stride(2)
1196     .output_stride(16)
1197     .block_width(2)
1198     .block_height(8)
1199     .element_size(1)
1200     .iterations(1)
1201     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1202 }
1203 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_5_8_bw_2)1204 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_5_8_bw_2){
1205   for(size_t i = 5; i < 8; ++i){
1206     TransposeMicrokernelTester()
1207       .input_stride(19)
1208       .output_stride(i)
1209       .block_width(5)
1210       .block_height(i)
1211       .element_size(1)
1212       .iterations(1)
1213       .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1214   }
1215 }
1216 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_5_8_bw_4)1217 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_5_8_bw_4){
1218   for(size_t i = 5; i < 8; ++i){
1219     TransposeMicrokernelTester()
1220       .input_stride(4)
1221       .output_stride(i)
1222       .block_width(4)
1223       .block_height(i)
1224       .element_size(1)
1225       .iterations(1)
1226       .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1227   }
1228 }
1229 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_5_8_bw_3_4)1230 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_5_8_bw_3_4) {
1231   for(size_t i = 5; i < 8; ++i){
1232     for(size_t j = 3; j < 4; ++j){
1233       TransposeMicrokernelTester()
1234         .input_stride(j)
1235         .output_stride(i)
1236         .block_width(j)
1237         .block_height(i)
1238         .element_size(1)
1239         .iterations(1)
1240         .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1241     }
1242   }
1243 }
1244 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2_is_4)1245 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_is_4) {
1246   TransposeMicrokernelTester()
1247     .input_stride(4)
1248     .output_stride(4)
1249     .block_width(2)
1250     .block_height(4)
1251     .element_size(1)
1252     .iterations(1)
1253     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1254 }
1255 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2_os_8)1256 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_os_8) {
1257   TransposeMicrokernelTester()
1258     .input_stride(2)
1259     .output_stride(8)
1260     .block_width(2)
1261     .block_height(4)
1262     .element_size(1)
1263     .iterations(1)
1264     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1265 }
1266 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2_is_4_os_8)1267 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_is_4_os_8) {
1268   TransposeMicrokernelTester()
1269     .input_stride(4)
1270     .output_stride(8)
1271     .block_width(2)
1272     .block_height(4)
1273     .element_size(1)
1274     .iterations(1)
1275     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1276 }
1277 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_68_bw_38_ies_12)1278 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_68_bw_38_ies_12) {
1279   TransposeMicrokernelTester()
1280     .input_stride(38)
1281     .output_stride(68)
1282     .block_width(38)
1283     .block_height(68)
1284     .element_size(1)
1285     .input_element_stride(12)
1286     .iterations(1)
1287     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1288 }
1289 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_12_bw_10_oes_12)1290 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_12_bw_10_oes_12) {
1291   TransposeMicrokernelTester()
1292     .input_stride(10)
1293     .output_stride(12)
1294     .block_width(10)
1295     .block_height(12)
1296     .element_size(1)
1297     .output_element_stride(12)
1298     .iterations(1)
1299     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1300 }
1301 
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_28_bw_46_ies_18_oes_14)1302 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_28_bw_46_ies_18_oes_14) {
1303   TransposeMicrokernelTester()
1304     .input_stride(51)
1305     .output_stride(34)
1306     .block_width(46)
1307     .block_height(28)
1308     .element_size(1)
1309     .input_element_stride(18)
1310     .output_element_stride(14)
1311     .iterations(1)
1312     .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1313 }
1314 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4)1315 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4) {
1316   TransposeMicrokernelTester()
1317     .input_stride(8)
1318     .output_stride(8)
1319     .block_width(4)
1320     .block_height(4)
1321     .element_size(1)
1322     .iterations(1)
1323     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1324 }
1325 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_1_8_bw_1_8)1326 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_1_8_bw_1_8) {
1327   for(size_t i = 1; i <= 8; ++i){
1328     for(size_t j = 1; j <= 8; ++j){
1329       TransposeMicrokernelTester()
1330         .input_stride(j * 3)
1331         .output_stride(i * 7)
1332         .block_width(j)
1333         .block_height(i)
1334         .element_size(1)
1335         .iterations(1)
1336         .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1337     }
1338   }
1339 }
1340 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_8)1341 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_8) {
1342   TransposeMicrokernelTester()
1343     .input_stride(8)
1344     .output_stride(4)
1345     .block_width(8)
1346     .block_height(4)
1347     .element_size(1)
1348     .iterations(1)
1349     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1350 }
1351 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_5_8)1352 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_5_8) {
1353   for(size_t i = 5; i < 8; ++i){
1354     TransposeMicrokernelTester()
1355       .input_stride(i)
1356       .output_stride(8)
1357       .block_width(i)
1358       .block_height(4)
1359       .element_size(1)
1360       .iterations(1)
1361       .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1362   }
1363 }
1364 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_8_bw_5_8)1365 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_8_bw_5_8) {
1366   for(size_t i = 5; i < 8; ++i){
1367     TransposeMicrokernelTester()
1368       .input_stride(i)
1369       .output_stride(8)
1370       .block_width(i)
1371       .block_height(8)
1372       .element_size(1)
1373       .iterations(1)
1374       .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1375   }
1376 }
1377 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_8_bw_4)1378 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_8_bw_4) {
1379   TransposeMicrokernelTester()
1380     .input_stride(4)
1381     .output_stride(16)
1382     .block_width(4)
1383     .block_height(8)
1384     .element_size(1)
1385     .iterations(1)
1386     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1387 }
1388 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_5_8_bw_4)1389 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_5_8_bw_4){
1390   for(size_t i = 5; i < 8; ++i){
1391     TransposeMicrokernelTester()
1392       .input_stride(21)
1393       .output_stride(i)
1394       .block_width(7)
1395       .block_height(i)
1396       .element_size(1)
1397       .iterations(1)
1398       .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1399   }
1400 }
1401 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_5_8_bw_8)1402 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_5_8_bw_8){
1403   for(size_t i = 5; i < 8; ++i){
1404     TransposeMicrokernelTester()
1405       .input_stride(8)
1406       .output_stride(i)
1407       .block_width(8)
1408       .block_height(i)
1409       .element_size(1)
1410       .iterations(1)
1411       .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1412   }
1413 }
1414 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_5_8_bw_5_8)1415 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_5_8_bw_5_8) {
1416   for(size_t i = 5; i < 8; ++i){
1417     for(size_t j = 5; j < 8; ++j){
1418       TransposeMicrokernelTester()
1419         .input_stride(j)
1420         .output_stride(i)
1421         .block_width(j)
1422         .block_height(i)
1423         .element_size(1)
1424         .iterations(1)
1425         .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1426     }
1427   }
1428 }
1429 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4_is_8)1430 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8) {
1431   TransposeMicrokernelTester()
1432     .input_stride(8)
1433     .output_stride(4)
1434     .block_width(4)
1435     .block_height(4)
1436     .element_size(1)
1437     .iterations(1)
1438     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1439 }
1440 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4_os_8)1441 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_os_8) {
1442   TransposeMicrokernelTester()
1443     .input_stride(4)
1444     .output_stride(8)
1445     .block_width(4)
1446     .block_height(4)
1447     .element_size(1)
1448     .iterations(1)
1449     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1450 }
1451 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4_is_8_os_8)1452 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
1453   TransposeMicrokernelTester()
1454     .input_stride(8)
1455     .output_stride(8)
1456     .block_width(4)
1457     .block_height(4)
1458     .element_size(1)
1459     .iterations(1)
1460     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1461 }
1462 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_68_bw_76_ies_12)1463 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_68_bw_76_ies_12) {
1464   TransposeMicrokernelTester()
1465     .input_stride(76)
1466     .output_stride(68)
1467     .block_width(76)
1468     .block_height(68)
1469     .element_size(1)
1470     .input_element_stride(12)
1471     .iterations(1)
1472     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1473 }
1474 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_12_bw_20_oes_12)1475 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_12_bw_20_oes_12) {
1476   TransposeMicrokernelTester()
1477     .input_stride(20)
1478     .output_stride(12)
1479     .block_width(20)
1480     .block_height(12)
1481     .element_size(1)
1482     .output_element_stride(12)
1483     .iterations(1)
1484     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1485 }
1486 
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_28_bw_92_ies_18_oes_14)1487 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_28_bw_92_ies_18_oes_14) {
1488   TransposeMicrokernelTester()
1489     .input_stride(97)
1490     .output_stride(34)
1491     .block_width(92)
1492     .block_height(28)
1493     .element_size(1)
1494     .input_element_stride(18)
1495     .output_element_stride(14)
1496     .iterations(1)
1497     .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1498 }
1499 
1500 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16)1501   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16) {
1502     TEST_REQUIRES_X86_SSE2;
1503     TransposeMicrokernelTester()
1504       .input_stride(32)
1505       .output_stride(32)
1506       .block_width(16)
1507       .block_height(16)
1508       .element_size(1)
1509       .iterations(1)
1510       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1511   }
1512 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_1_32_bw_1_32)1513   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_1_32_bw_1_32) {
1514     TEST_REQUIRES_X86_SSE2;
1515     for(size_t i = 1; i <= 32; ++i){
1516       for(size_t j = 1; j <= 32; ++j){
1517         TransposeMicrokernelTester()
1518           .input_stride(j * 3)
1519           .output_stride(i * 7)
1520           .block_width(j)
1521           .block_height(i)
1522           .element_size(1)
1523           .iterations(1)
1524           .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1525       }
1526     }
1527   }
1528 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_32)1529   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_32) {
1530     TEST_REQUIRES_X86_SSE2;
1531     TransposeMicrokernelTester()
1532       .input_stride(32)
1533       .output_stride(16)
1534       .block_width(32)
1535       .block_height(16)
1536       .element_size(1)
1537       .iterations(1)
1538       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1539   }
1540 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_17_32)1541   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_17_32) {
1542     TEST_REQUIRES_X86_SSE2;
1543     for(size_t i = 17; i < 32; ++i){
1544       TransposeMicrokernelTester()
1545         .input_stride(i)
1546         .output_stride(32)
1547         .block_width(i)
1548         .block_height(16)
1549         .element_size(1)
1550         .iterations(1)
1551         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1552     }
1553   }
1554 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_32_bw_17_32)1555   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_32_bw_17_32) {
1556     TEST_REQUIRES_X86_SSE2;
1557     for(size_t i = 17; i < 32; ++i){
1558       TransposeMicrokernelTester()
1559         .input_stride(i)
1560         .output_stride(32)
1561         .block_width(i)
1562         .block_height(32)
1563         .element_size(1)
1564         .iterations(1)
1565         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1566     }
1567   }
1568 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_32_bw_16)1569   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_32_bw_16) {
1570     TEST_REQUIRES_X86_SSE2;
1571     TransposeMicrokernelTester()
1572       .input_stride(16)
1573       .output_stride(52)
1574       .block_width(16)
1575       .block_height(32)
1576       .element_size(1)
1577       .iterations(1)
1578       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1579   }
1580 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_17_32_bw_16)1581   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_17_32_bw_16){
1582     TEST_REQUIRES_X86_SSE2;
1583     for(size_t i = 17; i < 32; ++i){
1584       TransposeMicrokernelTester()
1585         .input_stride(33)
1586         .output_stride(i)
1587         .block_width(19)
1588         .block_height(i)
1589         .element_size(1)
1590         .iterations(1)
1591         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1592     }
1593   }
1594 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_17_32_bw_32)1595   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_17_32_bw_32){
1596     TEST_REQUIRES_X86_SSE2;
1597     for(size_t i = 17; i < 32; ++i){
1598       TransposeMicrokernelTester()
1599         .input_stride(32)
1600         .output_stride(i)
1601         .block_width(32)
1602         .block_height(i)
1603         .element_size(1)
1604         .iterations(1)
1605         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1606     }
1607   }
1608 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_17_32_bw_17_32)1609   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_17_32_bw_17_32) {
1610     TEST_REQUIRES_X86_SSE2;
1611     for(size_t i = 17; i < 32; ++i){
1612       for(size_t j = 17; j < 32; ++j){
1613         TransposeMicrokernelTester()
1614           .input_stride(j)
1615           .output_stride(i)
1616           .block_width(j)
1617           .block_height(i)
1618           .element_size(1)
1619           .iterations(1)
1620           .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1621       }
1622     }
1623   }
1624 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16_is_32)1625   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16_is_32) {
1626     TEST_REQUIRES_X86_SSE2;
1627     TransposeMicrokernelTester()
1628       .input_stride(32)
1629       .output_stride(16)
1630       .block_width(16)
1631       .block_height(16)
1632       .element_size(1)
1633       .iterations(1)
1634       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1635   }
1636 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16_os_32)1637   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16_os_32) {
1638     TEST_REQUIRES_X86_SSE2;
1639     TransposeMicrokernelTester()
1640       .input_stride(16)
1641       .output_stride(32)
1642       .block_width(16)
1643       .block_height(16)
1644       .element_size(1)
1645       .iterations(1)
1646       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1647   }
1648 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16_is_32_os_32)1649   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16_is_32_os_32) {
1650     TEST_REQUIRES_X86_SSE2;
1651     TransposeMicrokernelTester()
1652       .input_stride(32)
1653       .output_stride(32)
1654       .block_width(16)
1655       .block_height(16)
1656       .element_size(1)
1657       .iterations(1)
1658       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1659   }
1660 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_272_bw_304_ies_12)1661   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_272_bw_304_ies_12) {
1662     TEST_REQUIRES_X86_SSE2;
1663     TransposeMicrokernelTester()
1664       .input_stride(304)
1665       .output_stride(272)
1666       .block_width(304)
1667       .block_height(272)
1668       .element_size(1)
1669       .input_element_stride(12)
1670       .iterations(1)
1671       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1672   }
1673 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_48_bw_80_oes_12)1674   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_48_bw_80_oes_12) {
1675     TEST_REQUIRES_X86_SSE2;
1676     TransposeMicrokernelTester()
1677       .input_stride(80)
1678       .output_stride(48)
1679       .block_width(80)
1680       .block_height(48)
1681       .element_size(1)
1682       .output_element_stride(12)
1683       .iterations(1)
1684       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1685   }
1686 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_112_bw_368_ies_18_oes_14)1687   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_112_bw_368_ies_18_oes_14) {
1688     TEST_REQUIRES_X86_SSE2;
1689     TransposeMicrokernelTester()
1690       .input_stride(373)
1691       .output_stride(118)
1692       .block_width(368)
1693       .block_height(112)
1694       .element_size(1)
1695       .input_element_stride(18)
1696       .output_element_stride(14)
1697       .iterations(1)
1698       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1699   }
1700 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1701 
1702 
1703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16)1704   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16) {
1705     TEST_REQUIRES_X86_SSE2;
1706     TransposeMicrokernelTester()
1707       .input_stride(32)
1708       .output_stride(32)
1709       .block_width(16)
1710       .block_height(16)
1711       .element_size(1)
1712       .iterations(1)
1713       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1714   }
1715 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_1_32_bw_1_32)1716   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_1_32_bw_1_32) {
1717     TEST_REQUIRES_X86_SSE2;
1718     for(size_t i = 1; i <= 32; ++i){
1719       for(size_t j = 1; j <= 32; ++j){
1720         TransposeMicrokernelTester()
1721           .input_stride(j * 3)
1722           .output_stride(i * 7)
1723           .block_width(j)
1724           .block_height(i)
1725           .element_size(1)
1726           .iterations(1)
1727           .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1728       }
1729     }
1730   }
1731 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_32)1732   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_32) {
1733     TEST_REQUIRES_X86_SSE2;
1734     TransposeMicrokernelTester()
1735       .input_stride(32)
1736       .output_stride(16)
1737       .block_width(32)
1738       .block_height(16)
1739       .element_size(1)
1740       .iterations(1)
1741       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1742   }
1743 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_17_32)1744   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_17_32) {
1745     TEST_REQUIRES_X86_SSE2;
1746     for(size_t i = 17; i < 32; ++i){
1747       TransposeMicrokernelTester()
1748         .input_stride(i)
1749         .output_stride(32)
1750         .block_width(i)
1751         .block_height(16)
1752         .element_size(1)
1753         .iterations(1)
1754         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1755     }
1756   }
1757 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_32_bw_17_32)1758   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_32_bw_17_32) {
1759     TEST_REQUIRES_X86_SSE2;
1760     for(size_t i = 17; i < 32; ++i){
1761       TransposeMicrokernelTester()
1762         .input_stride(i)
1763         .output_stride(32)
1764         .block_width(i)
1765         .block_height(32)
1766         .element_size(1)
1767         .iterations(1)
1768         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1769     }
1770   }
1771 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_32_bw_16)1772   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_32_bw_16) {
1773     TEST_REQUIRES_X86_SSE2;
1774     TransposeMicrokernelTester()
1775       .input_stride(16)
1776       .output_stride(52)
1777       .block_width(16)
1778       .block_height(32)
1779       .element_size(1)
1780       .iterations(1)
1781       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1782   }
1783 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_17_32_bw_16)1784   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_17_32_bw_16){
1785     TEST_REQUIRES_X86_SSE2;
1786     for(size_t i = 17; i < 32; ++i){
1787       TransposeMicrokernelTester()
1788         .input_stride(33)
1789         .output_stride(i)
1790         .block_width(19)
1791         .block_height(i)
1792         .element_size(1)
1793         .iterations(1)
1794         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1795     }
1796   }
1797 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_17_32_bw_32)1798   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_17_32_bw_32){
1799     TEST_REQUIRES_X86_SSE2;
1800     for(size_t i = 17; i < 32; ++i){
1801       TransposeMicrokernelTester()
1802         .input_stride(32)
1803         .output_stride(i)
1804         .block_width(32)
1805         .block_height(i)
1806         .element_size(1)
1807         .iterations(1)
1808         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1809     }
1810   }
1811 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_17_32_bw_17_32)1812   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_17_32_bw_17_32) {
1813     TEST_REQUIRES_X86_SSE2;
1814     for(size_t i = 17; i < 32; ++i){
1815       for(size_t j = 17; j < 32; ++j){
1816         TransposeMicrokernelTester()
1817           .input_stride(j)
1818           .output_stride(i)
1819           .block_width(j)
1820           .block_height(i)
1821           .element_size(1)
1822           .iterations(1)
1823           .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1824       }
1825     }
1826   }
1827 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16_is_32)1828   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16_is_32) {
1829     TEST_REQUIRES_X86_SSE2;
1830     TransposeMicrokernelTester()
1831       .input_stride(32)
1832       .output_stride(16)
1833       .block_width(16)
1834       .block_height(16)
1835       .element_size(1)
1836       .iterations(1)
1837       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1838   }
1839 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16_os_32)1840   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16_os_32) {
1841     TEST_REQUIRES_X86_SSE2;
1842     TransposeMicrokernelTester()
1843       .input_stride(16)
1844       .output_stride(32)
1845       .block_width(16)
1846       .block_height(16)
1847       .element_size(1)
1848       .iterations(1)
1849       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1850   }
1851 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16_is_32_os_32)1852   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16_is_32_os_32) {
1853     TEST_REQUIRES_X86_SSE2;
1854     TransposeMicrokernelTester()
1855       .input_stride(32)
1856       .output_stride(32)
1857       .block_width(16)
1858       .block_height(16)
1859       .element_size(1)
1860       .iterations(1)
1861       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1862   }
1863 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_272_bw_304_ies_12)1864   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_272_bw_304_ies_12) {
1865     TEST_REQUIRES_X86_SSE2;
1866     TransposeMicrokernelTester()
1867       .input_stride(304)
1868       .output_stride(272)
1869       .block_width(304)
1870       .block_height(272)
1871       .element_size(1)
1872       .input_element_stride(12)
1873       .iterations(1)
1874       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1875   }
1876 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_48_bw_80_oes_12)1877   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_48_bw_80_oes_12) {
1878     TEST_REQUIRES_X86_SSE2;
1879     TransposeMicrokernelTester()
1880       .input_stride(80)
1881       .output_stride(48)
1882       .block_width(80)
1883       .block_height(48)
1884       .element_size(1)
1885       .output_element_stride(12)
1886       .iterations(1)
1887       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1888   }
1889 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_112_bw_368_ies_18_oes_14)1890   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_112_bw_368_ies_18_oes_14) {
1891     TEST_REQUIRES_X86_SSE2;
1892     TransposeMicrokernelTester()
1893       .input_stride(373)
1894       .output_stride(118)
1895       .block_width(368)
1896       .block_height(112)
1897       .element_size(1)
1898       .input_element_stride(18)
1899       .output_element_stride(14)
1900       .iterations(1)
1901       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1902   }
1903 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1904 
1905 
1906 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16)1907   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16) {
1908     TransposeMicrokernelTester()
1909       .input_stride(32)
1910       .output_stride(32)
1911       .block_width(16)
1912       .block_height(16)
1913       .element_size(1)
1914       .iterations(1)
1915       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1916   }
1917 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_1_32_bw_1_32)1918   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_1_32_bw_1_32) {
1919     for(size_t i = 1; i <= 32; ++i){
1920       for(size_t j = 1; j <= 32; ++j){
1921         TransposeMicrokernelTester()
1922           .input_stride(j * 3)
1923           .output_stride(i * 7)
1924           .block_width(j)
1925           .block_height(i)
1926           .element_size(1)
1927           .iterations(1)
1928           .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1929       }
1930     }
1931   }
1932 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_32)1933   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_32) {
1934     TransposeMicrokernelTester()
1935       .input_stride(32)
1936       .output_stride(16)
1937       .block_width(32)
1938       .block_height(16)
1939       .element_size(1)
1940       .iterations(1)
1941       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1942   }
1943 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_17_32)1944   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_17_32) {
1945     for(size_t i = 17; i < 32; ++i){
1946       TransposeMicrokernelTester()
1947         .input_stride(i)
1948         .output_stride(32)
1949         .block_width(i)
1950         .block_height(16)
1951         .element_size(1)
1952         .iterations(1)
1953         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1954     }
1955   }
1956 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_32_bw_17_32)1957   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_32_bw_17_32) {
1958     for(size_t i = 17; i < 32; ++i){
1959       TransposeMicrokernelTester()
1960         .input_stride(i)
1961         .output_stride(32)
1962         .block_width(i)
1963         .block_height(32)
1964         .element_size(1)
1965         .iterations(1)
1966         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1967     }
1968   }
1969 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_32_bw_16)1970   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_32_bw_16) {
1971     TransposeMicrokernelTester()
1972       .input_stride(16)
1973       .output_stride(52)
1974       .block_width(16)
1975       .block_height(32)
1976       .element_size(1)
1977       .iterations(1)
1978       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1979   }
1980 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_17_32_bw_16)1981   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_17_32_bw_16){
1982     for(size_t i = 17; i < 32; ++i){
1983       TransposeMicrokernelTester()
1984         .input_stride(33)
1985         .output_stride(i)
1986         .block_width(19)
1987         .block_height(i)
1988         .element_size(1)
1989         .iterations(1)
1990         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1991     }
1992   }
1993 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_17_32_bw_32)1994   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_17_32_bw_32){
1995     for(size_t i = 17; i < 32; ++i){
1996       TransposeMicrokernelTester()
1997         .input_stride(32)
1998         .output_stride(i)
1999         .block_width(32)
2000         .block_height(i)
2001         .element_size(1)
2002         .iterations(1)
2003         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2004     }
2005   }
2006 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_17_32_bw_17_32)2007   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_17_32_bw_17_32) {
2008     for(size_t i = 17; i < 32; ++i){
2009       for(size_t j = 17; j < 32; ++j){
2010         TransposeMicrokernelTester()
2011           .input_stride(j)
2012           .output_stride(i)
2013           .block_width(j)
2014           .block_height(i)
2015           .element_size(1)
2016           .iterations(1)
2017           .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2018       }
2019     }
2020   }
2021 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16_is_32)2022   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16_is_32) {
2023     TransposeMicrokernelTester()
2024       .input_stride(32)
2025       .output_stride(16)
2026       .block_width(16)
2027       .block_height(16)
2028       .element_size(1)
2029       .iterations(1)
2030       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2031   }
2032 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16_os_32)2033   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16_os_32) {
2034     TransposeMicrokernelTester()
2035       .input_stride(16)
2036       .output_stride(32)
2037       .block_width(16)
2038       .block_height(16)
2039       .element_size(1)
2040       .iterations(1)
2041       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2042   }
2043 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16_is_32_os_32)2044   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16_is_32_os_32) {
2045     TransposeMicrokernelTester()
2046       .input_stride(32)
2047       .output_stride(32)
2048       .block_width(16)
2049       .block_height(16)
2050       .element_size(1)
2051       .iterations(1)
2052       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2053   }
2054 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_272_bw_304_ies_12)2055   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_272_bw_304_ies_12) {
2056     TransposeMicrokernelTester()
2057       .input_stride(304)
2058       .output_stride(272)
2059       .block_width(304)
2060       .block_height(272)
2061       .element_size(1)
2062       .input_element_stride(12)
2063       .iterations(1)
2064       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2065   }
2066 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_48_bw_80_oes_12)2067   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_48_bw_80_oes_12) {
2068     TransposeMicrokernelTester()
2069       .input_stride(80)
2070       .output_stride(48)
2071       .block_width(80)
2072       .block_height(48)
2073       .element_size(1)
2074       .output_element_stride(12)
2075       .iterations(1)
2076       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2077   }
2078 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_112_bw_368_ies_18_oes_14)2079   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_112_bw_368_ies_18_oes_14) {
2080     TransposeMicrokernelTester()
2081       .input_stride(373)
2082       .output_stride(118)
2083       .block_width(368)
2084       .block_height(112)
2085       .element_size(1)
2086       .input_element_stride(18)
2087       .output_element_stride(14)
2088       .iterations(1)
2089       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2090   }
2091 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2092 
2093 
2094 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16)2095   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16) {
2096     TransposeMicrokernelTester()
2097       .input_stride(32)
2098       .output_stride(32)
2099       .block_width(16)
2100       .block_height(16)
2101       .element_size(1)
2102       .iterations(1)
2103       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2104   }
2105 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_1_32_bw_1_32)2106   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_1_32_bw_1_32) {
2107     for(size_t i = 1; i <= 32; ++i){
2108       for(size_t j = 1; j <= 32; ++j){
2109         TransposeMicrokernelTester()
2110           .input_stride(j * 3)
2111           .output_stride(i * 7)
2112           .block_width(j)
2113           .block_height(i)
2114           .element_size(1)
2115           .iterations(1)
2116           .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2117       }
2118     }
2119   }
2120 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_32)2121   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_32) {
2122     TransposeMicrokernelTester()
2123       .input_stride(32)
2124       .output_stride(16)
2125       .block_width(32)
2126       .block_height(16)
2127       .element_size(1)
2128       .iterations(1)
2129       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2130   }
2131 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_17_32)2132   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_17_32) {
2133     for(size_t i = 17; i < 32; ++i){
2134       TransposeMicrokernelTester()
2135         .input_stride(i)
2136         .output_stride(32)
2137         .block_width(i)
2138         .block_height(16)
2139         .element_size(1)
2140         .iterations(1)
2141         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2142     }
2143   }
2144 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_32_bw_17_32)2145   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_32_bw_17_32) {
2146     for(size_t i = 17; i < 32; ++i){
2147       TransposeMicrokernelTester()
2148         .input_stride(i)
2149         .output_stride(32)
2150         .block_width(i)
2151         .block_height(32)
2152         .element_size(1)
2153         .iterations(1)
2154         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2155     }
2156   }
2157 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_32_bw_16)2158   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_32_bw_16) {
2159     TransposeMicrokernelTester()
2160       .input_stride(16)
2161       .output_stride(52)
2162       .block_width(16)
2163       .block_height(32)
2164       .element_size(1)
2165       .iterations(1)
2166       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2167   }
2168 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_17_32_bw_16)2169   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_17_32_bw_16){
2170     for(size_t i = 17; i < 32; ++i){
2171       TransposeMicrokernelTester()
2172         .input_stride(33)
2173         .output_stride(i)
2174         .block_width(19)
2175         .block_height(i)
2176         .element_size(1)
2177         .iterations(1)
2178         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2179     }
2180   }
2181 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_17_32_bw_32)2182   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_17_32_bw_32){
2183     for(size_t i = 17; i < 32; ++i){
2184       TransposeMicrokernelTester()
2185         .input_stride(32)
2186         .output_stride(i)
2187         .block_width(32)
2188         .block_height(i)
2189         .element_size(1)
2190         .iterations(1)
2191         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2192     }
2193   }
2194 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_17_32_bw_17_32)2195   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_17_32_bw_17_32) {
2196     for(size_t i = 17; i < 32; ++i){
2197       for(size_t j = 17; j < 32; ++j){
2198         TransposeMicrokernelTester()
2199           .input_stride(j)
2200           .output_stride(i)
2201           .block_width(j)
2202           .block_height(i)
2203           .element_size(1)
2204           .iterations(1)
2205           .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2206       }
2207     }
2208   }
2209 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16_is_32)2210   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16_is_32) {
2211     TransposeMicrokernelTester()
2212       .input_stride(32)
2213       .output_stride(16)
2214       .block_width(16)
2215       .block_height(16)
2216       .element_size(1)
2217       .iterations(1)
2218       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2219   }
2220 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16_os_32)2221   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16_os_32) {
2222     TransposeMicrokernelTester()
2223       .input_stride(16)
2224       .output_stride(32)
2225       .block_width(16)
2226       .block_height(16)
2227       .element_size(1)
2228       .iterations(1)
2229       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2230   }
2231 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16_is_32_os_32)2232   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16_is_32_os_32) {
2233     TransposeMicrokernelTester()
2234       .input_stride(32)
2235       .output_stride(32)
2236       .block_width(16)
2237       .block_height(16)
2238       .element_size(1)
2239       .iterations(1)
2240       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2241   }
2242 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_272_bw_304_ies_12)2243   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_272_bw_304_ies_12) {
2244     TransposeMicrokernelTester()
2245       .input_stride(304)
2246       .output_stride(272)
2247       .block_width(304)
2248       .block_height(272)
2249       .element_size(1)
2250       .input_element_stride(12)
2251       .iterations(1)
2252       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2253   }
2254 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_48_bw_80_oes_12)2255   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_48_bw_80_oes_12) {
2256     TransposeMicrokernelTester()
2257       .input_stride(80)
2258       .output_stride(48)
2259       .block_width(80)
2260       .block_height(48)
2261       .element_size(1)
2262       .output_element_stride(12)
2263       .iterations(1)
2264       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2265   }
2266 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_112_bw_368_ies_18_oes_14)2267   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_112_bw_368_ies_18_oes_14) {
2268     TransposeMicrokernelTester()
2269       .input_stride(373)
2270       .output_stride(118)
2271       .block_width(368)
2272       .block_height(112)
2273       .element_size(1)
2274       .input_element_stride(18)
2275       .output_element_stride(14)
2276       .iterations(1)
2277       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2278   }
2279 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2280 
2281 
2282 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8)2283   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8) {
2284     TEST_REQUIRES_ARM_NEON;
2285     TransposeMicrokernelTester()
2286       .input_stride(16)
2287       .output_stride(16)
2288       .block_width(8)
2289       .block_height(8)
2290       .element_size(1)
2291       .iterations(1)
2292       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2293   }
2294 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_1_16_bw_1_16)2295   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_1_16_bw_1_16) {
2296     TEST_REQUIRES_ARM_NEON;
2297     for(size_t i = 1; i <= 16; ++i){
2298       for(size_t j = 1; j <= 16; ++j){
2299         TransposeMicrokernelTester()
2300           .input_stride(j * 3)
2301           .output_stride(i * 7)
2302           .block_width(j)
2303           .block_height(i)
2304           .element_size(1)
2305           .iterations(1)
2306           .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2307       }
2308     }
2309   }
2310 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_16)2311   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_16) {
2312     TEST_REQUIRES_ARM_NEON;
2313     TransposeMicrokernelTester()
2314       .input_stride(16)
2315       .output_stride(8)
2316       .block_width(16)
2317       .block_height(8)
2318       .element_size(1)
2319       .iterations(1)
2320       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2321   }
2322 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_9_16)2323   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_9_16) {
2324     TEST_REQUIRES_ARM_NEON;
2325     for(size_t i = 9; i < 16; ++i){
2326       TransposeMicrokernelTester()
2327         .input_stride(i)
2328         .output_stride(16)
2329         .block_width(i)
2330         .block_height(8)
2331         .element_size(1)
2332         .iterations(1)
2333         .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2334     }
2335   }
2336 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_16_bw_9_16)2337   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_16_bw_9_16) {
2338     TEST_REQUIRES_ARM_NEON;
2339     for(size_t i = 9; i < 16; ++i){
2340       TransposeMicrokernelTester()
2341         .input_stride(i)
2342         .output_stride(16)
2343         .block_width(i)
2344         .block_height(16)
2345         .element_size(1)
2346         .iterations(1)
2347         .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2348     }
2349   }
2350 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_16_bw_8)2351   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_16_bw_8) {
2352     TEST_REQUIRES_ARM_NEON;
2353     TransposeMicrokernelTester()
2354       .input_stride(8)
2355       .output_stride(28)
2356       .block_width(8)
2357       .block_height(16)
2358       .element_size(1)
2359       .iterations(1)
2360       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2361   }
2362 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_9_16_bw_8)2363   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_9_16_bw_8){
2364     TEST_REQUIRES_ARM_NEON;
2365     for(size_t i = 9; i < 16; ++i){
2366       TransposeMicrokernelTester()
2367         .input_stride(25)
2368         .output_stride(i)
2369         .block_width(11)
2370         .block_height(i)
2371         .element_size(1)
2372         .iterations(1)
2373         .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2374     }
2375   }
2376 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_9_16_bw_16)2377   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_9_16_bw_16){
2378     TEST_REQUIRES_ARM_NEON;
2379     for(size_t i = 9; i < 16; ++i){
2380       TransposeMicrokernelTester()
2381         .input_stride(16)
2382         .output_stride(i)
2383         .block_width(16)
2384         .block_height(i)
2385         .element_size(1)
2386         .iterations(1)
2387         .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2388     }
2389   }
2390 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_9_16_bw_9_16)2391   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_9_16_bw_9_16) {
2392     TEST_REQUIRES_ARM_NEON;
2393     for(size_t i = 9; i < 16; ++i){
2394       for(size_t j = 9; j < 16; ++j){
2395         TransposeMicrokernelTester()
2396           .input_stride(j)
2397           .output_stride(i)
2398           .block_width(j)
2399           .block_height(i)
2400           .element_size(1)
2401           .iterations(1)
2402           .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2403       }
2404     }
2405   }
2406 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8_is_16)2407   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8_is_16) {
2408     TEST_REQUIRES_ARM_NEON;
2409     TransposeMicrokernelTester()
2410       .input_stride(16)
2411       .output_stride(8)
2412       .block_width(8)
2413       .block_height(8)
2414       .element_size(1)
2415       .iterations(1)
2416       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2417   }
2418 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8_os_16)2419   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8_os_16) {
2420     TEST_REQUIRES_ARM_NEON;
2421     TransposeMicrokernelTester()
2422       .input_stride(8)
2423       .output_stride(16)
2424       .block_width(8)
2425       .block_height(8)
2426       .element_size(1)
2427       .iterations(1)
2428       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2429   }
2430 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)2431   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
2432     TEST_REQUIRES_ARM_NEON;
2433     TransposeMicrokernelTester()
2434       .input_stride(16)
2435       .output_stride(16)
2436       .block_width(8)
2437       .block_height(8)
2438       .element_size(1)
2439       .iterations(1)
2440       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2441   }
2442 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_136_bw_152_ies_12)2443   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_136_bw_152_ies_12) {
2444     TEST_REQUIRES_ARM_NEON;
2445     TransposeMicrokernelTester()
2446       .input_stride(152)
2447       .output_stride(136)
2448       .block_width(152)
2449       .block_height(136)
2450       .element_size(1)
2451       .input_element_stride(12)
2452       .iterations(1)
2453       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2454   }
2455 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_24_bw_40_oes_12)2456   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_24_bw_40_oes_12) {
2457     TEST_REQUIRES_ARM_NEON;
2458     TransposeMicrokernelTester()
2459       .input_stride(40)
2460       .output_stride(24)
2461       .block_width(40)
2462       .block_height(24)
2463       .element_size(1)
2464       .output_element_stride(12)
2465       .iterations(1)
2466       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2467   }
2468 
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)2469   TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
2470     TEST_REQUIRES_ARM_NEON;
2471     TransposeMicrokernelTester()
2472       .input_stride(189)
2473       .output_stride(62)
2474       .block_width(184)
2475       .block_height(56)
2476       .element_size(1)
2477       .input_element_stride(18)
2478       .output_element_stride(14)
2479       .iterations(1)
2480       .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2481   }
2482 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2483 
2484 
2485 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8)2486   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8) {
2487     TEST_REQUIRES_ARM_NEON;
2488     TransposeMicrokernelTester()
2489       .input_stride(16)
2490       .output_stride(16)
2491       .block_width(8)
2492       .block_height(8)
2493       .element_size(1)
2494       .iterations(1)
2495       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2496   }
2497 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_1_16_bw_1_16)2498   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_1_16_bw_1_16) {
2499     TEST_REQUIRES_ARM_NEON;
2500     for(size_t i = 1; i <= 16; ++i){
2501       for(size_t j = 1; j <= 16; ++j){
2502         TransposeMicrokernelTester()
2503           .input_stride(j * 3)
2504           .output_stride(i * 7)
2505           .block_width(j)
2506           .block_height(i)
2507           .element_size(1)
2508           .iterations(1)
2509           .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2510       }
2511     }
2512   }
2513 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_16)2514   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_16) {
2515     TEST_REQUIRES_ARM_NEON;
2516     TransposeMicrokernelTester()
2517       .input_stride(16)
2518       .output_stride(8)
2519       .block_width(16)
2520       .block_height(8)
2521       .element_size(1)
2522       .iterations(1)
2523       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2524   }
2525 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_9_16)2526   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_9_16) {
2527     TEST_REQUIRES_ARM_NEON;
2528     for(size_t i = 9; i < 16; ++i){
2529       TransposeMicrokernelTester()
2530         .input_stride(i)
2531         .output_stride(16)
2532         .block_width(i)
2533         .block_height(8)
2534         .element_size(1)
2535         .iterations(1)
2536         .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2537     }
2538   }
2539 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_16_bw_9_16)2540   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_16_bw_9_16) {
2541     TEST_REQUIRES_ARM_NEON;
2542     for(size_t i = 9; i < 16; ++i){
2543       TransposeMicrokernelTester()
2544         .input_stride(i)
2545         .output_stride(16)
2546         .block_width(i)
2547         .block_height(16)
2548         .element_size(1)
2549         .iterations(1)
2550         .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2551     }
2552   }
2553 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_16_bw_8)2554   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_16_bw_8) {
2555     TEST_REQUIRES_ARM_NEON;
2556     TransposeMicrokernelTester()
2557       .input_stride(8)
2558       .output_stride(28)
2559       .block_width(8)
2560       .block_height(16)
2561       .element_size(1)
2562       .iterations(1)
2563       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2564   }
2565 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_9_16_bw_8)2566   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_9_16_bw_8){
2567     TEST_REQUIRES_ARM_NEON;
2568     for(size_t i = 9; i < 16; ++i){
2569       TransposeMicrokernelTester()
2570         .input_stride(25)
2571         .output_stride(i)
2572         .block_width(11)
2573         .block_height(i)
2574         .element_size(1)
2575         .iterations(1)
2576         .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2577     }
2578   }
2579 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_9_16_bw_16)2580   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_9_16_bw_16){
2581     TEST_REQUIRES_ARM_NEON;
2582     for(size_t i = 9; i < 16; ++i){
2583       TransposeMicrokernelTester()
2584         .input_stride(16)
2585         .output_stride(i)
2586         .block_width(16)
2587         .block_height(i)
2588         .element_size(1)
2589         .iterations(1)
2590         .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2591     }
2592   }
2593 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_9_16_bw_9_16)2594   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_9_16_bw_9_16) {
2595     TEST_REQUIRES_ARM_NEON;
2596     for(size_t i = 9; i < 16; ++i){
2597       for(size_t j = 9; j < 16; ++j){
2598         TransposeMicrokernelTester()
2599           .input_stride(j)
2600           .output_stride(i)
2601           .block_width(j)
2602           .block_height(i)
2603           .element_size(1)
2604           .iterations(1)
2605           .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2606       }
2607     }
2608   }
2609 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8_is_16)2610   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8_is_16) {
2611     TEST_REQUIRES_ARM_NEON;
2612     TransposeMicrokernelTester()
2613       .input_stride(16)
2614       .output_stride(8)
2615       .block_width(8)
2616       .block_height(8)
2617       .element_size(1)
2618       .iterations(1)
2619       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2620   }
2621 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8_os_16)2622   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8_os_16) {
2623     TEST_REQUIRES_ARM_NEON;
2624     TransposeMicrokernelTester()
2625       .input_stride(8)
2626       .output_stride(16)
2627       .block_width(8)
2628       .block_height(8)
2629       .element_size(1)
2630       .iterations(1)
2631       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2632   }
2633 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)2634   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
2635     TEST_REQUIRES_ARM_NEON;
2636     TransposeMicrokernelTester()
2637       .input_stride(16)
2638       .output_stride(16)
2639       .block_width(8)
2640       .block_height(8)
2641       .element_size(1)
2642       .iterations(1)
2643       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2644   }
2645 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_136_bw_152_ies_12)2646   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_136_bw_152_ies_12) {
2647     TEST_REQUIRES_ARM_NEON;
2648     TransposeMicrokernelTester()
2649       .input_stride(152)
2650       .output_stride(136)
2651       .block_width(152)
2652       .block_height(136)
2653       .element_size(1)
2654       .input_element_stride(12)
2655       .iterations(1)
2656       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2657   }
2658 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_24_bw_40_oes_12)2659   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_24_bw_40_oes_12) {
2660     TEST_REQUIRES_ARM_NEON;
2661     TransposeMicrokernelTester()
2662       .input_stride(40)
2663       .output_stride(24)
2664       .block_width(40)
2665       .block_height(24)
2666       .element_size(1)
2667       .output_element_stride(12)
2668       .iterations(1)
2669       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2670   }
2671 
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)2672   TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
2673     TEST_REQUIRES_ARM_NEON;
2674     TransposeMicrokernelTester()
2675       .input_stride(189)
2676       .output_stride(62)
2677       .block_width(184)
2678       .block_height(56)
2679       .element_size(1)
2680       .input_element_stride(18)
2681       .output_element_stride(14)
2682       .iterations(1)
2683       .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2684   }
2685 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2686 
2687 
2688 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8)2689   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8) {
2690     TEST_REQUIRES_ARM_NEON;
2691     TransposeMicrokernelTester()
2692       .input_stride(16)
2693       .output_stride(16)
2694       .block_width(8)
2695       .block_height(8)
2696       .element_size(1)
2697       .iterations(1)
2698       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2699   }
2700 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_1_16_bw_1_16)2701   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_1_16_bw_1_16) {
2702     TEST_REQUIRES_ARM_NEON;
2703     for(size_t i = 1; i <= 16; ++i){
2704       for(size_t j = 1; j <= 16; ++j){
2705         TransposeMicrokernelTester()
2706           .input_stride(j * 3)
2707           .output_stride(i * 7)
2708           .block_width(j)
2709           .block_height(i)
2710           .element_size(1)
2711           .iterations(1)
2712           .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2713       }
2714     }
2715   }
2716 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_16)2717   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_16) {
2718     TEST_REQUIRES_ARM_NEON;
2719     TransposeMicrokernelTester()
2720       .input_stride(16)
2721       .output_stride(8)
2722       .block_width(16)
2723       .block_height(8)
2724       .element_size(1)
2725       .iterations(1)
2726       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2727   }
2728 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_9_16)2729   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_9_16) {
2730     TEST_REQUIRES_ARM_NEON;
2731     for(size_t i = 9; i < 16; ++i){
2732       TransposeMicrokernelTester()
2733         .input_stride(i)
2734         .output_stride(16)
2735         .block_width(i)
2736         .block_height(8)
2737         .element_size(1)
2738         .iterations(1)
2739         .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2740     }
2741   }
2742 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_16_bw_9_16)2743   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_16_bw_9_16) {
2744     TEST_REQUIRES_ARM_NEON;
2745     for(size_t i = 9; i < 16; ++i){
2746       TransposeMicrokernelTester()
2747         .input_stride(i)
2748         .output_stride(16)
2749         .block_width(i)
2750         .block_height(16)
2751         .element_size(1)
2752         .iterations(1)
2753         .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2754     }
2755   }
2756 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_16_bw_8)2757   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_16_bw_8) {
2758     TEST_REQUIRES_ARM_NEON;
2759     TransposeMicrokernelTester()
2760       .input_stride(8)
2761       .output_stride(28)
2762       .block_width(8)
2763       .block_height(16)
2764       .element_size(1)
2765       .iterations(1)
2766       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2767   }
2768 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_9_16_bw_8)2769   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_9_16_bw_8){
2770     TEST_REQUIRES_ARM_NEON;
2771     for(size_t i = 9; i < 16; ++i){
2772       TransposeMicrokernelTester()
2773         .input_stride(25)
2774         .output_stride(i)
2775         .block_width(11)
2776         .block_height(i)
2777         .element_size(1)
2778         .iterations(1)
2779         .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2780     }
2781   }
2782 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_9_16_bw_16)2783   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_9_16_bw_16){
2784     TEST_REQUIRES_ARM_NEON;
2785     for(size_t i = 9; i < 16; ++i){
2786       TransposeMicrokernelTester()
2787         .input_stride(16)
2788         .output_stride(i)
2789         .block_width(16)
2790         .block_height(i)
2791         .element_size(1)
2792         .iterations(1)
2793         .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2794     }
2795   }
2796 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_9_16_bw_9_16)2797   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_9_16_bw_9_16) {
2798     TEST_REQUIRES_ARM_NEON;
2799     for(size_t i = 9; i < 16; ++i){
2800       for(size_t j = 9; j < 16; ++j){
2801         TransposeMicrokernelTester()
2802           .input_stride(j)
2803           .output_stride(i)
2804           .block_width(j)
2805           .block_height(i)
2806           .element_size(1)
2807           .iterations(1)
2808           .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2809       }
2810     }
2811   }
2812 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16)2813   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16) {
2814     TEST_REQUIRES_ARM_NEON;
2815     TransposeMicrokernelTester()
2816       .input_stride(16)
2817       .output_stride(8)
2818       .block_width(8)
2819       .block_height(8)
2820       .element_size(1)
2821       .iterations(1)
2822       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2823   }
2824 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8_os_16)2825   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8_os_16) {
2826     TEST_REQUIRES_ARM_NEON;
2827     TransposeMicrokernelTester()
2828       .input_stride(8)
2829       .output_stride(16)
2830       .block_width(8)
2831       .block_height(8)
2832       .element_size(1)
2833       .iterations(1)
2834       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2835   }
2836 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)2837   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
2838     TEST_REQUIRES_ARM_NEON;
2839     TransposeMicrokernelTester()
2840       .input_stride(16)
2841       .output_stride(16)
2842       .block_width(8)
2843       .block_height(8)
2844       .element_size(1)
2845       .iterations(1)
2846       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2847   }
2848 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_136_bw_152_ies_12)2849   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_136_bw_152_ies_12) {
2850     TEST_REQUIRES_ARM_NEON;
2851     TransposeMicrokernelTester()
2852       .input_stride(152)
2853       .output_stride(136)
2854       .block_width(152)
2855       .block_height(136)
2856       .element_size(1)
2857       .input_element_stride(12)
2858       .iterations(1)
2859       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2860   }
2861 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_24_bw_40_oes_12)2862   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_24_bw_40_oes_12) {
2863     TEST_REQUIRES_ARM_NEON;
2864     TransposeMicrokernelTester()
2865       .input_stride(40)
2866       .output_stride(24)
2867       .block_width(40)
2868       .block_height(24)
2869       .element_size(1)
2870       .output_element_stride(12)
2871       .iterations(1)
2872       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2873   }
2874 
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)2875   TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
2876     TEST_REQUIRES_ARM_NEON;
2877     TransposeMicrokernelTester()
2878       .input_stride(189)
2879       .output_stride(62)
2880       .block_width(184)
2881       .block_height(56)
2882       .element_size(1)
2883       .input_element_stride(18)
2884       .output_element_stride(14)
2885       .iterations(1)
2886       .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2887   }
2888 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2889 
2890 
2891 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8)2892   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8) {
2893     TEST_REQUIRES_ARM_NEON;
2894     TransposeMicrokernelTester()
2895       .input_stride(16)
2896       .output_stride(16)
2897       .block_width(8)
2898       .block_height(8)
2899       .element_size(1)
2900       .iterations(1)
2901       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2902   }
2903 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_1_16_bw_1_16)2904   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_1_16_bw_1_16) {
2905     TEST_REQUIRES_ARM_NEON;
2906     for(size_t i = 1; i <= 16; ++i){
2907       for(size_t j = 1; j <= 16; ++j){
2908         TransposeMicrokernelTester()
2909           .input_stride(j * 3)
2910           .output_stride(i * 7)
2911           .block_width(j)
2912           .block_height(i)
2913           .element_size(1)
2914           .iterations(1)
2915           .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2916       }
2917     }
2918   }
2919 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_16)2920   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_16) {
2921     TEST_REQUIRES_ARM_NEON;
2922     TransposeMicrokernelTester()
2923       .input_stride(16)
2924       .output_stride(8)
2925       .block_width(16)
2926       .block_height(8)
2927       .element_size(1)
2928       .iterations(1)
2929       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2930   }
2931 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_9_16)2932   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_9_16) {
2933     TEST_REQUIRES_ARM_NEON;
2934     for(size_t i = 9; i < 16; ++i){
2935       TransposeMicrokernelTester()
2936         .input_stride(i)
2937         .output_stride(16)
2938         .block_width(i)
2939         .block_height(8)
2940         .element_size(1)
2941         .iterations(1)
2942         .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2943     }
2944   }
2945 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_16_bw_9_16)2946   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_16_bw_9_16) {
2947     TEST_REQUIRES_ARM_NEON;
2948     for(size_t i = 9; i < 16; ++i){
2949       TransposeMicrokernelTester()
2950         .input_stride(i)
2951         .output_stride(16)
2952         .block_width(i)
2953         .block_height(16)
2954         .element_size(1)
2955         .iterations(1)
2956         .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2957     }
2958   }
2959 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_16_bw_8)2960   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_16_bw_8) {
2961     TEST_REQUIRES_ARM_NEON;
2962     TransposeMicrokernelTester()
2963       .input_stride(8)
2964       .output_stride(28)
2965       .block_width(8)
2966       .block_height(16)
2967       .element_size(1)
2968       .iterations(1)
2969       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2970   }
2971 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_9_16_bw_8)2972   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_9_16_bw_8){
2973     TEST_REQUIRES_ARM_NEON;
2974     for(size_t i = 9; i < 16; ++i){
2975       TransposeMicrokernelTester()
2976         .input_stride(25)
2977         .output_stride(i)
2978         .block_width(11)
2979         .block_height(i)
2980         .element_size(1)
2981         .iterations(1)
2982         .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2983     }
2984   }
2985 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_9_16_bw_16)2986   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_9_16_bw_16){
2987     TEST_REQUIRES_ARM_NEON;
2988     for(size_t i = 9; i < 16; ++i){
2989       TransposeMicrokernelTester()
2990         .input_stride(16)
2991         .output_stride(i)
2992         .block_width(16)
2993         .block_height(i)
2994         .element_size(1)
2995         .iterations(1)
2996         .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2997     }
2998   }
2999 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_9_16_bw_9_16)3000   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_9_16_bw_9_16) {
3001     TEST_REQUIRES_ARM_NEON;
3002     for(size_t i = 9; i < 16; ++i){
3003       for(size_t j = 9; j < 16; ++j){
3004         TransposeMicrokernelTester()
3005           .input_stride(j)
3006           .output_stride(i)
3007           .block_width(j)
3008           .block_height(i)
3009           .element_size(1)
3010           .iterations(1)
3011           .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3012       }
3013     }
3014   }
3015 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8_is_16)3016   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8_is_16) {
3017     TEST_REQUIRES_ARM_NEON;
3018     TransposeMicrokernelTester()
3019       .input_stride(16)
3020       .output_stride(8)
3021       .block_width(8)
3022       .block_height(8)
3023       .element_size(1)
3024       .iterations(1)
3025       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3026   }
3027 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8_os_16)3028   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8_os_16) {
3029     TEST_REQUIRES_ARM_NEON;
3030     TransposeMicrokernelTester()
3031       .input_stride(8)
3032       .output_stride(16)
3033       .block_width(8)
3034       .block_height(8)
3035       .element_size(1)
3036       .iterations(1)
3037       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3038   }
3039 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3040   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3041     TEST_REQUIRES_ARM_NEON;
3042     TransposeMicrokernelTester()
3043       .input_stride(16)
3044       .output_stride(16)
3045       .block_width(8)
3046       .block_height(8)
3047       .element_size(1)
3048       .iterations(1)
3049       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3050   }
3051 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_136_bw_152_ies_12)3052   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3053     TEST_REQUIRES_ARM_NEON;
3054     TransposeMicrokernelTester()
3055       .input_stride(152)
3056       .output_stride(136)
3057       .block_width(152)
3058       .block_height(136)
3059       .element_size(1)
3060       .input_element_stride(12)
3061       .iterations(1)
3062       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3063   }
3064 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_24_bw_40_oes_12)3065   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3066     TEST_REQUIRES_ARM_NEON;
3067     TransposeMicrokernelTester()
3068       .input_stride(40)
3069       .output_stride(24)
3070       .block_width(40)
3071       .block_height(24)
3072       .element_size(1)
3073       .output_element_stride(12)
3074       .iterations(1)
3075       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3076   }
3077 
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3078   TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3079     TEST_REQUIRES_ARM_NEON;
3080     TransposeMicrokernelTester()
3081       .input_stride(189)
3082       .output_stride(62)
3083       .block_width(184)
3084       .block_height(56)
3085       .element_size(1)
3086       .input_element_stride(18)
3087       .output_element_stride(14)
3088       .iterations(1)
3089       .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3090   }
3091 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3092 
3093 
3094 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8)3095   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8) {
3096     TEST_REQUIRES_ARM_NEON;
3097     TransposeMicrokernelTester()
3098       .input_stride(16)
3099       .output_stride(16)
3100       .block_width(8)
3101       .block_height(8)
3102       .element_size(1)
3103       .iterations(1)
3104       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3105   }
3106 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_1_16_bw_1_16)3107   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_1_16_bw_1_16) {
3108     TEST_REQUIRES_ARM_NEON;
3109     for(size_t i = 1; i <= 16; ++i){
3110       for(size_t j = 1; j <= 16; ++j){
3111         TransposeMicrokernelTester()
3112           .input_stride(j * 3)
3113           .output_stride(i * 7)
3114           .block_width(j)
3115           .block_height(i)
3116           .element_size(1)
3117           .iterations(1)
3118           .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3119       }
3120     }
3121   }
3122 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_16)3123   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_16) {
3124     TEST_REQUIRES_ARM_NEON;
3125     TransposeMicrokernelTester()
3126       .input_stride(16)
3127       .output_stride(8)
3128       .block_width(16)
3129       .block_height(8)
3130       .element_size(1)
3131       .iterations(1)
3132       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3133   }
3134 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_9_16)3135   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_9_16) {
3136     TEST_REQUIRES_ARM_NEON;
3137     for(size_t i = 9; i < 16; ++i){
3138       TransposeMicrokernelTester()
3139         .input_stride(i)
3140         .output_stride(16)
3141         .block_width(i)
3142         .block_height(8)
3143         .element_size(1)
3144         .iterations(1)
3145         .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3146     }
3147   }
3148 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_16_bw_9_16)3149   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_16_bw_9_16) {
3150     TEST_REQUIRES_ARM_NEON;
3151     for(size_t i = 9; i < 16; ++i){
3152       TransposeMicrokernelTester()
3153         .input_stride(i)
3154         .output_stride(16)
3155         .block_width(i)
3156         .block_height(16)
3157         .element_size(1)
3158         .iterations(1)
3159         .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3160     }
3161   }
3162 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_16_bw_8)3163   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_16_bw_8) {
3164     TEST_REQUIRES_ARM_NEON;
3165     TransposeMicrokernelTester()
3166       .input_stride(8)
3167       .output_stride(28)
3168       .block_width(8)
3169       .block_height(16)
3170       .element_size(1)
3171       .iterations(1)
3172       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3173   }
3174 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_9_16_bw_8)3175   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_9_16_bw_8){
3176     TEST_REQUIRES_ARM_NEON;
3177     for(size_t i = 9; i < 16; ++i){
3178       TransposeMicrokernelTester()
3179         .input_stride(25)
3180         .output_stride(i)
3181         .block_width(11)
3182         .block_height(i)
3183         .element_size(1)
3184         .iterations(1)
3185         .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3186     }
3187   }
3188 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_9_16_bw_16)3189   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_9_16_bw_16){
3190     TEST_REQUIRES_ARM_NEON;
3191     for(size_t i = 9; i < 16; ++i){
3192       TransposeMicrokernelTester()
3193         .input_stride(16)
3194         .output_stride(i)
3195         .block_width(16)
3196         .block_height(i)
3197         .element_size(1)
3198         .iterations(1)
3199         .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3200     }
3201   }
3202 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_9_16_bw_9_16)3203   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_9_16_bw_9_16) {
3204     TEST_REQUIRES_ARM_NEON;
3205     for(size_t i = 9; i < 16; ++i){
3206       for(size_t j = 9; j < 16; ++j){
3207         TransposeMicrokernelTester()
3208           .input_stride(j)
3209           .output_stride(i)
3210           .block_width(j)
3211           .block_height(i)
3212           .element_size(1)
3213           .iterations(1)
3214           .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3215       }
3216     }
3217   }
3218 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8_is_16)3219   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8_is_16) {
3220     TEST_REQUIRES_ARM_NEON;
3221     TransposeMicrokernelTester()
3222       .input_stride(16)
3223       .output_stride(8)
3224       .block_width(8)
3225       .block_height(8)
3226       .element_size(1)
3227       .iterations(1)
3228       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3229   }
3230 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8_os_16)3231   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8_os_16) {
3232     TEST_REQUIRES_ARM_NEON;
3233     TransposeMicrokernelTester()
3234       .input_stride(8)
3235       .output_stride(16)
3236       .block_width(8)
3237       .block_height(8)
3238       .element_size(1)
3239       .iterations(1)
3240       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3241   }
3242 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3243   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3244     TEST_REQUIRES_ARM_NEON;
3245     TransposeMicrokernelTester()
3246       .input_stride(16)
3247       .output_stride(16)
3248       .block_width(8)
3249       .block_height(8)
3250       .element_size(1)
3251       .iterations(1)
3252       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3253   }
3254 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_136_bw_152_ies_12)3255   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3256     TEST_REQUIRES_ARM_NEON;
3257     TransposeMicrokernelTester()
3258       .input_stride(152)
3259       .output_stride(136)
3260       .block_width(152)
3261       .block_height(136)
3262       .element_size(1)
3263       .input_element_stride(12)
3264       .iterations(1)
3265       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3266   }
3267 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_24_bw_40_oes_12)3268   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3269     TEST_REQUIRES_ARM_NEON;
3270     TransposeMicrokernelTester()
3271       .input_stride(40)
3272       .output_stride(24)
3273       .block_width(40)
3274       .block_height(24)
3275       .element_size(1)
3276       .output_element_stride(12)
3277       .iterations(1)
3278       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3279   }
3280 
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3281   TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3282     TEST_REQUIRES_ARM_NEON;
3283     TransposeMicrokernelTester()
3284       .input_stride(189)
3285       .output_stride(62)
3286       .block_width(184)
3287       .block_height(56)
3288       .element_size(1)
3289       .input_element_stride(18)
3290       .output_element_stride(14)
3291       .iterations(1)
3292       .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3293   }
3294 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3295 
3296 
3297 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8)3298   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8) {
3299     TEST_REQUIRES_ARM_NEON;
3300     TransposeMicrokernelTester()
3301       .input_stride(16)
3302       .output_stride(16)
3303       .block_width(8)
3304       .block_height(8)
3305       .element_size(1)
3306       .iterations(1)
3307       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3308   }
3309 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_1_16_bw_1_16)3310   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_1_16_bw_1_16) {
3311     TEST_REQUIRES_ARM_NEON;
3312     for(size_t i = 1; i <= 16; ++i){
3313       for(size_t j = 1; j <= 16; ++j){
3314         TransposeMicrokernelTester()
3315           .input_stride(j * 3)
3316           .output_stride(i * 7)
3317           .block_width(j)
3318           .block_height(i)
3319           .element_size(1)
3320           .iterations(1)
3321           .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3322       }
3323     }
3324   }
3325 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_16)3326   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_16) {
3327     TEST_REQUIRES_ARM_NEON;
3328     TransposeMicrokernelTester()
3329       .input_stride(16)
3330       .output_stride(8)
3331       .block_width(16)
3332       .block_height(8)
3333       .element_size(1)
3334       .iterations(1)
3335       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3336   }
3337 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_9_16)3338   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_9_16) {
3339     TEST_REQUIRES_ARM_NEON;
3340     for(size_t i = 9; i < 16; ++i){
3341       TransposeMicrokernelTester()
3342         .input_stride(i)
3343         .output_stride(16)
3344         .block_width(i)
3345         .block_height(8)
3346         .element_size(1)
3347         .iterations(1)
3348         .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3349     }
3350   }
3351 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_16_bw_9_16)3352   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_16_bw_9_16) {
3353     TEST_REQUIRES_ARM_NEON;
3354     for(size_t i = 9; i < 16; ++i){
3355       TransposeMicrokernelTester()
3356         .input_stride(i)
3357         .output_stride(16)
3358         .block_width(i)
3359         .block_height(16)
3360         .element_size(1)
3361         .iterations(1)
3362         .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3363     }
3364   }
3365 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_16_bw_8)3366   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_16_bw_8) {
3367     TEST_REQUIRES_ARM_NEON;
3368     TransposeMicrokernelTester()
3369       .input_stride(8)
3370       .output_stride(28)
3371       .block_width(8)
3372       .block_height(16)
3373       .element_size(1)
3374       .iterations(1)
3375       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3376   }
3377 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_9_16_bw_8)3378   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_9_16_bw_8){
3379     TEST_REQUIRES_ARM_NEON;
3380     for(size_t i = 9; i < 16; ++i){
3381       TransposeMicrokernelTester()
3382         .input_stride(25)
3383         .output_stride(i)
3384         .block_width(11)
3385         .block_height(i)
3386         .element_size(1)
3387         .iterations(1)
3388         .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3389     }
3390   }
3391 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_9_16_bw_16)3392   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_9_16_bw_16){
3393     TEST_REQUIRES_ARM_NEON;
3394     for(size_t i = 9; i < 16; ++i){
3395       TransposeMicrokernelTester()
3396         .input_stride(16)
3397         .output_stride(i)
3398         .block_width(16)
3399         .block_height(i)
3400         .element_size(1)
3401         .iterations(1)
3402         .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3403     }
3404   }
3405 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_9_16_bw_9_16)3406   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_9_16_bw_9_16) {
3407     TEST_REQUIRES_ARM_NEON;
3408     for(size_t i = 9; i < 16; ++i){
3409       for(size_t j = 9; j < 16; ++j){
3410         TransposeMicrokernelTester()
3411           .input_stride(j)
3412           .output_stride(i)
3413           .block_width(j)
3414           .block_height(i)
3415           .element_size(1)
3416           .iterations(1)
3417           .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3418       }
3419     }
3420   }
3421 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8_is_16)3422   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8_is_16) {
3423     TEST_REQUIRES_ARM_NEON;
3424     TransposeMicrokernelTester()
3425       .input_stride(16)
3426       .output_stride(8)
3427       .block_width(8)
3428       .block_height(8)
3429       .element_size(1)
3430       .iterations(1)
3431       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3432   }
3433 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8_os_16)3434   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8_os_16) {
3435     TEST_REQUIRES_ARM_NEON;
3436     TransposeMicrokernelTester()
3437       .input_stride(8)
3438       .output_stride(16)
3439       .block_width(8)
3440       .block_height(8)
3441       .element_size(1)
3442       .iterations(1)
3443       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3444   }
3445 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3446   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3447     TEST_REQUIRES_ARM_NEON;
3448     TransposeMicrokernelTester()
3449       .input_stride(16)
3450       .output_stride(16)
3451       .block_width(8)
3452       .block_height(8)
3453       .element_size(1)
3454       .iterations(1)
3455       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3456   }
3457 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_136_bw_152_ies_12)3458   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3459     TEST_REQUIRES_ARM_NEON;
3460     TransposeMicrokernelTester()
3461       .input_stride(152)
3462       .output_stride(136)
3463       .block_width(152)
3464       .block_height(136)
3465       .element_size(1)
3466       .input_element_stride(12)
3467       .iterations(1)
3468       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3469   }
3470 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_24_bw_40_oes_12)3471   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3472     TEST_REQUIRES_ARM_NEON;
3473     TransposeMicrokernelTester()
3474       .input_stride(40)
3475       .output_stride(24)
3476       .block_width(40)
3477       .block_height(24)
3478       .element_size(1)
3479       .output_element_stride(12)
3480       .iterations(1)
3481       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3482   }
3483 
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3484   TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3485     TEST_REQUIRES_ARM_NEON;
3486     TransposeMicrokernelTester()
3487       .input_stride(189)
3488       .output_stride(62)
3489       .block_width(184)
3490       .block_height(56)
3491       .element_size(1)
3492       .input_element_stride(18)
3493       .output_element_stride(14)
3494       .iterations(1)
3495       .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3496   }
3497 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3498 
3499 
3500 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8)3501   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8) {
3502     TEST_REQUIRES_ARM_NEON;
3503     TransposeMicrokernelTester()
3504       .input_stride(16)
3505       .output_stride(16)
3506       .block_width(8)
3507       .block_height(8)
3508       .element_size(1)
3509       .iterations(1)
3510       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3511   }
3512 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_1_16_bw_1_16)3513   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_1_16_bw_1_16) {
3514     TEST_REQUIRES_ARM_NEON;
3515     for(size_t i = 1; i <= 16; ++i){
3516       for(size_t j = 1; j <= 16; ++j){
3517         TransposeMicrokernelTester()
3518           .input_stride(j * 3)
3519           .output_stride(i * 7)
3520           .block_width(j)
3521           .block_height(i)
3522           .element_size(1)
3523           .iterations(1)
3524           .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3525       }
3526     }
3527   }
3528 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_16)3529   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_16) {
3530     TEST_REQUIRES_ARM_NEON;
3531     TransposeMicrokernelTester()
3532       .input_stride(16)
3533       .output_stride(8)
3534       .block_width(16)
3535       .block_height(8)
3536       .element_size(1)
3537       .iterations(1)
3538       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3539   }
3540 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_9_16)3541   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_9_16) {
3542     TEST_REQUIRES_ARM_NEON;
3543     for(size_t i = 9; i < 16; ++i){
3544       TransposeMicrokernelTester()
3545         .input_stride(i)
3546         .output_stride(16)
3547         .block_width(i)
3548         .block_height(8)
3549         .element_size(1)
3550         .iterations(1)
3551         .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3552     }
3553   }
3554 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_9_16)3555   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_9_16) {
3556     TEST_REQUIRES_ARM_NEON;
3557     for(size_t i = 9; i < 16; ++i){
3558       TransposeMicrokernelTester()
3559         .input_stride(i)
3560         .output_stride(16)
3561         .block_width(i)
3562         .block_height(16)
3563         .element_size(1)
3564         .iterations(1)
3565         .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3566     }
3567   }
3568 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_8)3569   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_8) {
3570     TEST_REQUIRES_ARM_NEON;
3571     TransposeMicrokernelTester()
3572       .input_stride(8)
3573       .output_stride(28)
3574       .block_width(8)
3575       .block_height(16)
3576       .element_size(1)
3577       .iterations(1)
3578       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3579   }
3580 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_9_16_bw_8)3581   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_9_16_bw_8){
3582     TEST_REQUIRES_ARM_NEON;
3583     for(size_t i = 9; i < 16; ++i){
3584       TransposeMicrokernelTester()
3585         .input_stride(25)
3586         .output_stride(i)
3587         .block_width(11)
3588         .block_height(i)
3589         .element_size(1)
3590         .iterations(1)
3591         .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3592     }
3593   }
3594 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_9_16_bw_16)3595   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_9_16_bw_16){
3596     TEST_REQUIRES_ARM_NEON;
3597     for(size_t i = 9; i < 16; ++i){
3598       TransposeMicrokernelTester()
3599         .input_stride(16)
3600         .output_stride(i)
3601         .block_width(16)
3602         .block_height(i)
3603         .element_size(1)
3604         .iterations(1)
3605         .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3606     }
3607   }
3608 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_9_16_bw_9_16)3609   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_9_16_bw_9_16) {
3610     TEST_REQUIRES_ARM_NEON;
3611     for(size_t i = 9; i < 16; ++i){
3612       for(size_t j = 9; j < 16; ++j){
3613         TransposeMicrokernelTester()
3614           .input_stride(j)
3615           .output_stride(i)
3616           .block_width(j)
3617           .block_height(i)
3618           .element_size(1)
3619           .iterations(1)
3620           .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3621       }
3622     }
3623   }
3624 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16)3625   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16) {
3626     TEST_REQUIRES_ARM_NEON;
3627     TransposeMicrokernelTester()
3628       .input_stride(16)
3629       .output_stride(8)
3630       .block_width(8)
3631       .block_height(8)
3632       .element_size(1)
3633       .iterations(1)
3634       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3635   }
3636 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8_os_16)3637   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8_os_16) {
3638     TEST_REQUIRES_ARM_NEON;
3639     TransposeMicrokernelTester()
3640       .input_stride(8)
3641       .output_stride(16)
3642       .block_width(8)
3643       .block_height(8)
3644       .element_size(1)
3645       .iterations(1)
3646       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3647   }
3648 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3649   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3650     TEST_REQUIRES_ARM_NEON;
3651     TransposeMicrokernelTester()
3652       .input_stride(16)
3653       .output_stride(16)
3654       .block_width(8)
3655       .block_height(8)
3656       .element_size(1)
3657       .iterations(1)
3658       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3659   }
3660 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_136_bw_152_ies_12)3661   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3662     TEST_REQUIRES_ARM_NEON;
3663     TransposeMicrokernelTester()
3664       .input_stride(152)
3665       .output_stride(136)
3666       .block_width(152)
3667       .block_height(136)
3668       .element_size(1)
3669       .input_element_stride(12)
3670       .iterations(1)
3671       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3672   }
3673 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_24_bw_40_oes_12)3674   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3675     TEST_REQUIRES_ARM_NEON;
3676     TransposeMicrokernelTester()
3677       .input_stride(40)
3678       .output_stride(24)
3679       .block_width(40)
3680       .block_height(24)
3681       .element_size(1)
3682       .output_element_stride(12)
3683       .iterations(1)
3684       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3685   }
3686 
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3687   TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3688     TEST_REQUIRES_ARM_NEON;
3689     TransposeMicrokernelTester()
3690       .input_stride(189)
3691       .output_stride(62)
3692       .block_width(184)
3693       .block_height(56)
3694       .element_size(1)
3695       .input_element_stride(18)
3696       .output_element_stride(14)
3697       .iterations(1)
3698       .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3699   }
3700 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3701 
3702 
3703 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16)3704   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16) {
3705     TEST_REQUIRES_ARM_NEON;
3706     TransposeMicrokernelTester()
3707       .input_stride(32)
3708       .output_stride(32)
3709       .block_width(16)
3710       .block_height(16)
3711       .element_size(1)
3712       .iterations(1)
3713       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3714   }
3715 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_1_32_bw_1_32)3716   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_1_32_bw_1_32) {
3717     TEST_REQUIRES_ARM_NEON;
3718     for(size_t i = 1; i <= 32; ++i){
3719       for(size_t j = 1; j <= 32; ++j){
3720         TransposeMicrokernelTester()
3721           .input_stride(j * 3)
3722           .output_stride(i * 7)
3723           .block_width(j)
3724           .block_height(i)
3725           .element_size(1)
3726           .iterations(1)
3727           .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3728       }
3729     }
3730   }
3731 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_32)3732   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_32) {
3733     TEST_REQUIRES_ARM_NEON;
3734     TransposeMicrokernelTester()
3735       .input_stride(32)
3736       .output_stride(16)
3737       .block_width(32)
3738       .block_height(16)
3739       .element_size(1)
3740       .iterations(1)
3741       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3742   }
3743 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_17_32)3744   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_17_32) {
3745     TEST_REQUIRES_ARM_NEON;
3746     for(size_t i = 17; i < 32; ++i){
3747       TransposeMicrokernelTester()
3748         .input_stride(i)
3749         .output_stride(32)
3750         .block_width(i)
3751         .block_height(16)
3752         .element_size(1)
3753         .iterations(1)
3754         .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3755     }
3756   }
3757 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_32_bw_17_32)3758   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_32_bw_17_32) {
3759     TEST_REQUIRES_ARM_NEON;
3760     for(size_t i = 17; i < 32; ++i){
3761       TransposeMicrokernelTester()
3762         .input_stride(i)
3763         .output_stride(32)
3764         .block_width(i)
3765         .block_height(32)
3766         .element_size(1)
3767         .iterations(1)
3768         .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3769     }
3770   }
3771 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_32_bw_16)3772   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_32_bw_16) {
3773     TEST_REQUIRES_ARM_NEON;
3774     TransposeMicrokernelTester()
3775       .input_stride(16)
3776       .output_stride(52)
3777       .block_width(16)
3778       .block_height(32)
3779       .element_size(1)
3780       .iterations(1)
3781       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3782   }
3783 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_17_32_bw_16)3784   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_17_32_bw_16){
3785     TEST_REQUIRES_ARM_NEON;
3786     for(size_t i = 17; i < 32; ++i){
3787       TransposeMicrokernelTester()
3788         .input_stride(33)
3789         .output_stride(i)
3790         .block_width(19)
3791         .block_height(i)
3792         .element_size(1)
3793         .iterations(1)
3794         .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3795     }
3796   }
3797 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_17_32_bw_32)3798   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_17_32_bw_32){
3799     TEST_REQUIRES_ARM_NEON;
3800     for(size_t i = 17; i < 32; ++i){
3801       TransposeMicrokernelTester()
3802         .input_stride(32)
3803         .output_stride(i)
3804         .block_width(32)
3805         .block_height(i)
3806         .element_size(1)
3807         .iterations(1)
3808         .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3809     }
3810   }
3811 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_17_32_bw_17_32)3812   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_17_32_bw_17_32) {
3813     TEST_REQUIRES_ARM_NEON;
3814     for(size_t i = 17; i < 32; ++i){
3815       for(size_t j = 17; j < 32; ++j){
3816         TransposeMicrokernelTester()
3817           .input_stride(j)
3818           .output_stride(i)
3819           .block_width(j)
3820           .block_height(i)
3821           .element_size(1)
3822           .iterations(1)
3823           .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3824       }
3825     }
3826   }
3827 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16_is_32)3828   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16_is_32) {
3829     TEST_REQUIRES_ARM_NEON;
3830     TransposeMicrokernelTester()
3831       .input_stride(32)
3832       .output_stride(16)
3833       .block_width(16)
3834       .block_height(16)
3835       .element_size(1)
3836       .iterations(1)
3837       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3838   }
3839 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16_os_32)3840   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16_os_32) {
3841     TEST_REQUIRES_ARM_NEON;
3842     TransposeMicrokernelTester()
3843       .input_stride(16)
3844       .output_stride(32)
3845       .block_width(16)
3846       .block_height(16)
3847       .element_size(1)
3848       .iterations(1)
3849       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3850   }
3851 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16_is_32_os_32)3852   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16_is_32_os_32) {
3853     TEST_REQUIRES_ARM_NEON;
3854     TransposeMicrokernelTester()
3855       .input_stride(32)
3856       .output_stride(32)
3857       .block_width(16)
3858       .block_height(16)
3859       .element_size(1)
3860       .iterations(1)
3861       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3862   }
3863 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_272_bw_304_ies_12)3864   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_272_bw_304_ies_12) {
3865     TEST_REQUIRES_ARM_NEON;
3866     TransposeMicrokernelTester()
3867       .input_stride(304)
3868       .output_stride(272)
3869       .block_width(304)
3870       .block_height(272)
3871       .element_size(1)
3872       .input_element_stride(12)
3873       .iterations(1)
3874       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3875   }
3876 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_48_bw_80_oes_12)3877   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_48_bw_80_oes_12) {
3878     TEST_REQUIRES_ARM_NEON;
3879     TransposeMicrokernelTester()
3880       .input_stride(80)
3881       .output_stride(48)
3882       .block_width(80)
3883       .block_height(48)
3884       .element_size(1)
3885       .output_element_stride(12)
3886       .iterations(1)
3887       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3888   }
3889 
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_112_bw_368_ies_18_oes_14)3890   TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
3891     TEST_REQUIRES_ARM_NEON;
3892     TransposeMicrokernelTester()
3893       .input_stride(373)
3894       .output_stride(118)
3895       .block_width(368)
3896       .block_height(112)
3897       .element_size(1)
3898       .input_element_stride(18)
3899       .output_element_stride(14)
3900       .iterations(1)
3901       .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3902   }
3903 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3904 
3905 
3906 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16)3907   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16) {
3908     TEST_REQUIRES_ARM_NEON;
3909     TransposeMicrokernelTester()
3910       .input_stride(32)
3911       .output_stride(32)
3912       .block_width(16)
3913       .block_height(16)
3914       .element_size(1)
3915       .iterations(1)
3916       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3917   }
3918 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_1_32_bw_1_32)3919   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_1_32_bw_1_32) {
3920     TEST_REQUIRES_ARM_NEON;
3921     for(size_t i = 1; i <= 32; ++i){
3922       for(size_t j = 1; j <= 32; ++j){
3923         TransposeMicrokernelTester()
3924           .input_stride(j * 3)
3925           .output_stride(i * 7)
3926           .block_width(j)
3927           .block_height(i)
3928           .element_size(1)
3929           .iterations(1)
3930           .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3931       }
3932     }
3933   }
3934 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_32)3935   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_32) {
3936     TEST_REQUIRES_ARM_NEON;
3937     TransposeMicrokernelTester()
3938       .input_stride(32)
3939       .output_stride(16)
3940       .block_width(32)
3941       .block_height(16)
3942       .element_size(1)
3943       .iterations(1)
3944       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3945   }
3946 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_17_32)3947   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_17_32) {
3948     TEST_REQUIRES_ARM_NEON;
3949     for(size_t i = 17; i < 32; ++i){
3950       TransposeMicrokernelTester()
3951         .input_stride(i)
3952         .output_stride(32)
3953         .block_width(i)
3954         .block_height(16)
3955         .element_size(1)
3956         .iterations(1)
3957         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3958     }
3959   }
3960 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_32_bw_17_32)3961   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_32_bw_17_32) {
3962     TEST_REQUIRES_ARM_NEON;
3963     for(size_t i = 17; i < 32; ++i){
3964       TransposeMicrokernelTester()
3965         .input_stride(i)
3966         .output_stride(32)
3967         .block_width(i)
3968         .block_height(32)
3969         .element_size(1)
3970         .iterations(1)
3971         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3972     }
3973   }
3974 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_32_bw_16)3975   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_32_bw_16) {
3976     TEST_REQUIRES_ARM_NEON;
3977     TransposeMicrokernelTester()
3978       .input_stride(16)
3979       .output_stride(52)
3980       .block_width(16)
3981       .block_height(32)
3982       .element_size(1)
3983       .iterations(1)
3984       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3985   }
3986 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_17_32_bw_16)3987   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_17_32_bw_16){
3988     TEST_REQUIRES_ARM_NEON;
3989     for(size_t i = 17; i < 32; ++i){
3990       TransposeMicrokernelTester()
3991         .input_stride(33)
3992         .output_stride(i)
3993         .block_width(19)
3994         .block_height(i)
3995         .element_size(1)
3996         .iterations(1)
3997         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3998     }
3999   }
4000 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_17_32_bw_32)4001   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_17_32_bw_32){
4002     TEST_REQUIRES_ARM_NEON;
4003     for(size_t i = 17; i < 32; ++i){
4004       TransposeMicrokernelTester()
4005         .input_stride(32)
4006         .output_stride(i)
4007         .block_width(32)
4008         .block_height(i)
4009         .element_size(1)
4010         .iterations(1)
4011         .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4012     }
4013   }
4014 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_17_32_bw_17_32)4015   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_17_32_bw_17_32) {
4016     TEST_REQUIRES_ARM_NEON;
4017     for(size_t i = 17; i < 32; ++i){
4018       for(size_t j = 17; j < 32; ++j){
4019         TransposeMicrokernelTester()
4020           .input_stride(j)
4021           .output_stride(i)
4022           .block_width(j)
4023           .block_height(i)
4024           .element_size(1)
4025           .iterations(1)
4026           .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4027       }
4028     }
4029   }
4030 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16_is_32)4031   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16_is_32) {
4032     TEST_REQUIRES_ARM_NEON;
4033     TransposeMicrokernelTester()
4034       .input_stride(32)
4035       .output_stride(16)
4036       .block_width(16)
4037       .block_height(16)
4038       .element_size(1)
4039       .iterations(1)
4040       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4041   }
4042 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16_os_32)4043   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16_os_32) {
4044     TEST_REQUIRES_ARM_NEON;
4045     TransposeMicrokernelTester()
4046       .input_stride(16)
4047       .output_stride(32)
4048       .block_width(16)
4049       .block_height(16)
4050       .element_size(1)
4051       .iterations(1)
4052       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4053   }
4054 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16_is_32_os_32)4055   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16_is_32_os_32) {
4056     TEST_REQUIRES_ARM_NEON;
4057     TransposeMicrokernelTester()
4058       .input_stride(32)
4059       .output_stride(32)
4060       .block_width(16)
4061       .block_height(16)
4062       .element_size(1)
4063       .iterations(1)
4064       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4065   }
4066 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_272_bw_304_ies_12)4067   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_272_bw_304_ies_12) {
4068     TEST_REQUIRES_ARM_NEON;
4069     TransposeMicrokernelTester()
4070       .input_stride(304)
4071       .output_stride(272)
4072       .block_width(304)
4073       .block_height(272)
4074       .element_size(1)
4075       .input_element_stride(12)
4076       .iterations(1)
4077       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4078   }
4079 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_48_bw_80_oes_12)4080   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_48_bw_80_oes_12) {
4081     TEST_REQUIRES_ARM_NEON;
4082     TransposeMicrokernelTester()
4083       .input_stride(80)
4084       .output_stride(48)
4085       .block_width(80)
4086       .block_height(48)
4087       .element_size(1)
4088       .output_element_stride(12)
4089       .iterations(1)
4090       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4091   }
4092 
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_112_bw_368_ies_18_oes_14)4093   TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
4094     TEST_REQUIRES_ARM_NEON;
4095     TransposeMicrokernelTester()
4096       .input_stride(373)
4097       .output_stride(118)
4098       .block_width(368)
4099       .block_height(112)
4100       .element_size(1)
4101       .input_element_stride(18)
4102       .output_element_stride(14)
4103       .iterations(1)
4104       .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4105   }
4106 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4107 
4108 
4109 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16)4110   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16) {
4111     TEST_REQUIRES_ARM_NEON;
4112     TransposeMicrokernelTester()
4113       .input_stride(32)
4114       .output_stride(32)
4115       .block_width(16)
4116       .block_height(16)
4117       .element_size(1)
4118       .iterations(1)
4119       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4120   }
4121 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_1_32_bw_1_32)4122   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_1_32_bw_1_32) {
4123     TEST_REQUIRES_ARM_NEON;
4124     for(size_t i = 1; i <= 32; ++i){
4125       for(size_t j = 1; j <= 32; ++j){
4126         TransposeMicrokernelTester()
4127           .input_stride(j * 3)
4128           .output_stride(i * 7)
4129           .block_width(j)
4130           .block_height(i)
4131           .element_size(1)
4132           .iterations(1)
4133           .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4134       }
4135     }
4136   }
4137 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_32)4138   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_32) {
4139     TEST_REQUIRES_ARM_NEON;
4140     TransposeMicrokernelTester()
4141       .input_stride(32)
4142       .output_stride(16)
4143       .block_width(32)
4144       .block_height(16)
4145       .element_size(1)
4146       .iterations(1)
4147       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4148   }
4149 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_17_32)4150   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_17_32) {
4151     TEST_REQUIRES_ARM_NEON;
4152     for(size_t i = 17; i < 32; ++i){
4153       TransposeMicrokernelTester()
4154         .input_stride(i)
4155         .output_stride(32)
4156         .block_width(i)
4157         .block_height(16)
4158         .element_size(1)
4159         .iterations(1)
4160         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4161     }
4162   }
4163 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_32_bw_17_32)4164   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_32_bw_17_32) {
4165     TEST_REQUIRES_ARM_NEON;
4166     for(size_t i = 17; i < 32; ++i){
4167       TransposeMicrokernelTester()
4168         .input_stride(i)
4169         .output_stride(32)
4170         .block_width(i)
4171         .block_height(32)
4172         .element_size(1)
4173         .iterations(1)
4174         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4175     }
4176   }
4177 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_32_bw_16)4178   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_32_bw_16) {
4179     TEST_REQUIRES_ARM_NEON;
4180     TransposeMicrokernelTester()
4181       .input_stride(16)
4182       .output_stride(52)
4183       .block_width(16)
4184       .block_height(32)
4185       .element_size(1)
4186       .iterations(1)
4187       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4188   }
4189 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_17_32_bw_16)4190   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_17_32_bw_16){
4191     TEST_REQUIRES_ARM_NEON;
4192     for(size_t i = 17; i < 32; ++i){
4193       TransposeMicrokernelTester()
4194         .input_stride(33)
4195         .output_stride(i)
4196         .block_width(19)
4197         .block_height(i)
4198         .element_size(1)
4199         .iterations(1)
4200         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4201     }
4202   }
4203 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_17_32_bw_32)4204   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_17_32_bw_32){
4205     TEST_REQUIRES_ARM_NEON;
4206     for(size_t i = 17; i < 32; ++i){
4207       TransposeMicrokernelTester()
4208         .input_stride(32)
4209         .output_stride(i)
4210         .block_width(32)
4211         .block_height(i)
4212         .element_size(1)
4213         .iterations(1)
4214         .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4215     }
4216   }
4217 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_17_32_bw_17_32)4218   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_17_32_bw_17_32) {
4219     TEST_REQUIRES_ARM_NEON;
4220     for(size_t i = 17; i < 32; ++i){
4221       for(size_t j = 17; j < 32; ++j){
4222         TransposeMicrokernelTester()
4223           .input_stride(j)
4224           .output_stride(i)
4225           .block_width(j)
4226           .block_height(i)
4227           .element_size(1)
4228           .iterations(1)
4229           .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4230       }
4231     }
4232   }
4233 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16_is_32)4234   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16_is_32) {
4235     TEST_REQUIRES_ARM_NEON;
4236     TransposeMicrokernelTester()
4237       .input_stride(32)
4238       .output_stride(16)
4239       .block_width(16)
4240       .block_height(16)
4241       .element_size(1)
4242       .iterations(1)
4243       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4244   }
4245 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16_os_32)4246   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16_os_32) {
4247     TEST_REQUIRES_ARM_NEON;
4248     TransposeMicrokernelTester()
4249       .input_stride(16)
4250       .output_stride(32)
4251       .block_width(16)
4252       .block_height(16)
4253       .element_size(1)
4254       .iterations(1)
4255       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4256   }
4257 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16_is_32_os_32)4258   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16_is_32_os_32) {
4259     TEST_REQUIRES_ARM_NEON;
4260     TransposeMicrokernelTester()
4261       .input_stride(32)
4262       .output_stride(32)
4263       .block_width(16)
4264       .block_height(16)
4265       .element_size(1)
4266       .iterations(1)
4267       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4268   }
4269 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_272_bw_304_ies_12)4270   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_272_bw_304_ies_12) {
4271     TEST_REQUIRES_ARM_NEON;
4272     TransposeMicrokernelTester()
4273       .input_stride(304)
4274       .output_stride(272)
4275       .block_width(304)
4276       .block_height(272)
4277       .element_size(1)
4278       .input_element_stride(12)
4279       .iterations(1)
4280       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4281   }
4282 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_48_bw_80_oes_12)4283   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_48_bw_80_oes_12) {
4284     TEST_REQUIRES_ARM_NEON;
4285     TransposeMicrokernelTester()
4286       .input_stride(80)
4287       .output_stride(48)
4288       .block_width(80)
4289       .block_height(48)
4290       .element_size(1)
4291       .output_element_stride(12)
4292       .iterations(1)
4293       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4294   }
4295 
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_112_bw_368_ies_18_oes_14)4296   TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
4297     TEST_REQUIRES_ARM_NEON;
4298     TransposeMicrokernelTester()
4299       .input_stride(373)
4300       .output_stride(118)
4301       .block_width(368)
4302       .block_height(112)
4303       .element_size(1)
4304       .input_element_stride(18)
4305       .output_element_stride(14)
4306       .iterations(1)
4307       .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4308   }
4309 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4310