xref: /aosp_15_r20/external/XNNPACK/test/x64-transpose.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x64-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2)20 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(4)
23     .output_stride(2)
24     .block_width(2)
25     .block_height(1)
26     .element_size(8)
27     .iterations(1)
28     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
29 }
30 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_2_bw_1_4)31 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_2_bw_1_4) {
32   for(size_t i = 1; i <= 2; ++i){
33     for(size_t j = 1; j <= 4; ++j){
34       TransposeMicrokernelTester()
35         .input_stride(j * 3)
36         .output_stride(i * 7)
37         .block_width(j)
38         .block_height(i)
39         .element_size(8)
40         .iterations(1)
41         .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
42     }
43   }
44 }
45 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_4)46 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_4) {
47   TransposeMicrokernelTester()
48     .input_stride(4)
49     .output_stride(1)
50     .block_width(4)
51     .block_height(1)
52     .element_size(8)
53     .iterations(1)
54     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
55 }
56 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_3_4)57 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_3_4) {
58   for(size_t i = 3; i < 4; ++i){
59     TransposeMicrokernelTester()
60       .input_stride(i)
61       .output_stride(2)
62       .block_width(i)
63       .block_height(1)
64       .element_size(8)
65       .iterations(1)
66       .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
67   }
68 }
69 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_bw_3_4)70 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_bw_3_4) {
71   for(size_t i = 3; i < 4; ++i){
72     TransposeMicrokernelTester()
73       .input_stride(i)
74       .output_stride(2)
75       .block_width(i)
76       .block_height(2)
77       .element_size(8)
78       .iterations(1)
79       .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
80   }
81 }
82 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_bw_2)83 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_bw_2) {
84   TransposeMicrokernelTester()
85     .input_stride(2)
86     .output_stride(7)
87     .block_width(2)
88     .block_height(2)
89     .element_size(8)
90     .iterations(1)
91     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
92 }
93 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_2_bw_2)94 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_2_bw_2){
95   for(size_t i = 2; i < 2; ++i){
96     TransposeMicrokernelTester()
97       .input_stride(19)
98       .output_stride(i)
99       .block_width(5)
100       .block_height(i)
101       .element_size(8)
102       .iterations(1)
103       .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
104   }
105 }
106 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_2_bw_4)107 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_2_bw_4){
108   for(size_t i = 2; i < 2; ++i){
109     TransposeMicrokernelTester()
110       .input_stride(4)
111       .output_stride(i)
112       .block_width(4)
113       .block_height(i)
114       .element_size(8)
115       .iterations(1)
116       .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
117   }
118 }
119 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_2_bw_3_4)120 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_2_bw_3_4) {
121   for(size_t i = 2; i < 2; ++i){
122     for(size_t j = 3; j < 4; ++j){
123       TransposeMicrokernelTester()
124         .input_stride(j)
125         .output_stride(i)
126         .block_width(j)
127         .block_height(i)
128         .element_size(8)
129         .iterations(1)
130         .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
131     }
132   }
133 }
134 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2_is_4)135 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_is_4) {
136   TransposeMicrokernelTester()
137     .input_stride(4)
138     .output_stride(1)
139     .block_width(2)
140     .block_height(1)
141     .element_size(8)
142     .iterations(1)
143     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
144 }
145 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2_os_2)146 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(2)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .element_size(8)
153     .iterations(1)
154     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
155 }
156 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2_is_4_os_2)157 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_is_4_os_2) {
158   TransposeMicrokernelTester()
159     .input_stride(4)
160     .output_stride(2)
161     .block_width(2)
162     .block_height(1)
163     .element_size(8)
164     .iterations(1)
165     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
166 }
167 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_17_bw_38_ies_19)168 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_17_bw_38_ies_19) {
169   TransposeMicrokernelTester()
170     .input_stride(38)
171     .output_stride(17)
172     .block_width(38)
173     .block_height(17)
174     .element_size(8)
175     .input_element_stride(19)
176     .iterations(1)
177     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
178 }
179 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_3_bw_10_oes_19)180 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_3_bw_10_oes_19) {
181   TransposeMicrokernelTester()
182     .input_stride(10)
183     .output_stride(3)
184     .block_width(10)
185     .block_height(3)
186     .element_size(8)
187     .output_element_stride(19)
188     .iterations(1)
189     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
190 }
191 
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_7_bw_46_ies_25_oes_21)192 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_7_bw_46_ies_25_oes_21) {
193   TransposeMicrokernelTester()
194     .input_stride(51)
195     .output_stride(13)
196     .block_width(46)
197     .block_height(7)
198     .element_size(8)
199     .input_element_stride(25)
200     .output_element_stride(21)
201     .iterations(1)
202     .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
203 }
204 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2)205 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2) {
206   TransposeMicrokernelTester()
207     .input_stride(4)
208     .output_stride(2)
209     .block_width(2)
210     .block_height(1)
211     .element_size(8)
212     .iterations(1)
213     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
214 }
215 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_2_bw_1_4)216 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_2_bw_1_4) {
217   for(size_t i = 1; i <= 2; ++i){
218     for(size_t j = 1; j <= 4; ++j){
219       TransposeMicrokernelTester()
220         .input_stride(j * 3)
221         .output_stride(i * 7)
222         .block_width(j)
223         .block_height(i)
224         .element_size(8)
225         .iterations(1)
226         .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
227     }
228   }
229 }
230 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_4)231 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_4) {
232   TransposeMicrokernelTester()
233     .input_stride(4)
234     .output_stride(1)
235     .block_width(4)
236     .block_height(1)
237     .element_size(8)
238     .iterations(1)
239     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
240 }
241 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_3_4)242 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_3_4) {
243   for(size_t i = 3; i < 4; ++i){
244     TransposeMicrokernelTester()
245       .input_stride(i)
246       .output_stride(2)
247       .block_width(i)
248       .block_height(1)
249       .element_size(8)
250       .iterations(1)
251       .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
252   }
253 }
254 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_bw_3_4)255 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_bw_3_4) {
256   for(size_t i = 3; i < 4; ++i){
257     TransposeMicrokernelTester()
258       .input_stride(i)
259       .output_stride(2)
260       .block_width(i)
261       .block_height(2)
262       .element_size(8)
263       .iterations(1)
264       .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
265   }
266 }
267 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_bw_2)268 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_bw_2) {
269   TransposeMicrokernelTester()
270     .input_stride(2)
271     .output_stride(7)
272     .block_width(2)
273     .block_height(2)
274     .element_size(8)
275     .iterations(1)
276     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
277 }
278 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_2_bw_2)279 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_2_bw_2){
280   for(size_t i = 2; i < 2; ++i){
281     TransposeMicrokernelTester()
282       .input_stride(19)
283       .output_stride(i)
284       .block_width(5)
285       .block_height(i)
286       .element_size(8)
287       .iterations(1)
288       .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
289   }
290 }
291 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_2_bw_4)292 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_2_bw_4){
293   for(size_t i = 2; i < 2; ++i){
294     TransposeMicrokernelTester()
295       .input_stride(4)
296       .output_stride(i)
297       .block_width(4)
298       .block_height(i)
299       .element_size(8)
300       .iterations(1)
301       .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
302   }
303 }
304 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_2_bw_3_4)305 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_2_bw_3_4) {
306   for(size_t i = 2; i < 2; ++i){
307     for(size_t j = 3; j < 4; ++j){
308       TransposeMicrokernelTester()
309         .input_stride(j)
310         .output_stride(i)
311         .block_width(j)
312         .block_height(i)
313         .element_size(8)
314         .iterations(1)
315         .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
316     }
317   }
318 }
319 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2_is_4)320 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_is_4) {
321   TransposeMicrokernelTester()
322     .input_stride(4)
323     .output_stride(1)
324     .block_width(2)
325     .block_height(1)
326     .element_size(8)
327     .iterations(1)
328     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
329 }
330 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2_os_2)331 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_os_2) {
332   TransposeMicrokernelTester()
333     .input_stride(2)
334     .output_stride(2)
335     .block_width(2)
336     .block_height(1)
337     .element_size(8)
338     .iterations(1)
339     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
340 }
341 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2_is_4_os_2)342 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_is_4_os_2) {
343   TransposeMicrokernelTester()
344     .input_stride(4)
345     .output_stride(2)
346     .block_width(2)
347     .block_height(1)
348     .element_size(8)
349     .iterations(1)
350     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
351 }
352 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_17_bw_38_ies_19)353 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_17_bw_38_ies_19) {
354   TransposeMicrokernelTester()
355     .input_stride(38)
356     .output_stride(17)
357     .block_width(38)
358     .block_height(17)
359     .element_size(8)
360     .input_element_stride(19)
361     .iterations(1)
362     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
363 }
364 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_3_bw_10_oes_19)365 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_3_bw_10_oes_19) {
366   TransposeMicrokernelTester()
367     .input_stride(10)
368     .output_stride(3)
369     .block_width(10)
370     .block_height(3)
371     .element_size(8)
372     .output_element_stride(19)
373     .iterations(1)
374     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
375 }
376 
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_7_bw_46_ies_25_oes_21)377 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_7_bw_46_ies_25_oes_21) {
378   TransposeMicrokernelTester()
379     .input_stride(51)
380     .output_stride(13)
381     .block_width(46)
382     .block_height(7)
383     .element_size(8)
384     .input_element_stride(25)
385     .output_element_stride(21)
386     .iterations(1)
387     .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
388 }
389 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1)390 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1) {
391   TransposeMicrokernelTester()
392     .input_stride(2)
393     .output_stride(4)
394     .block_width(1)
395     .block_height(2)
396     .element_size(8)
397     .iterations(1)
398     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
399 }
400 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_1_4_bw_1_2)401 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_1_4_bw_1_2) {
402   for(size_t i = 1; i <= 4; ++i){
403     for(size_t j = 1; j <= 2; ++j){
404       TransposeMicrokernelTester()
405         .input_stride(j * 3)
406         .output_stride(i * 7)
407         .block_width(j)
408         .block_height(i)
409         .element_size(8)
410         .iterations(1)
411         .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
412     }
413   }
414 }
415 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_2)416 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_2) {
417   TransposeMicrokernelTester()
418     .input_stride(2)
419     .output_stride(2)
420     .block_width(2)
421     .block_height(2)
422     .element_size(8)
423     .iterations(1)
424     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
425 }
426 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_2_2)427 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_2_2) {
428   for(size_t i = 2; i < 2; ++i){
429     TransposeMicrokernelTester()
430       .input_stride(i)
431       .output_stride(4)
432       .block_width(i)
433       .block_height(2)
434       .element_size(8)
435       .iterations(1)
436       .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
437   }
438 }
439 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_4_bw_2_2)440 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_4_bw_2_2) {
441   for(size_t i = 2; i < 2; ++i){
442     TransposeMicrokernelTester()
443       .input_stride(i)
444       .output_stride(4)
445       .block_width(i)
446       .block_height(4)
447       .element_size(8)
448       .iterations(1)
449       .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
450   }
451 }
452 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_4_bw_1)453 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_4_bw_1) {
454   TransposeMicrokernelTester()
455     .input_stride(1)
456     .output_stride(10)
457     .block_width(1)
458     .block_height(4)
459     .element_size(8)
460     .iterations(1)
461     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
462 }
463 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_3_4_bw_1)464 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_3_4_bw_1){
465   for(size_t i = 3; i < 4; ++i){
466     TransposeMicrokernelTester()
467       .input_stride(18)
468       .output_stride(i)
469       .block_width(4)
470       .block_height(i)
471       .element_size(8)
472       .iterations(1)
473       .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
474   }
475 }
476 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_3_4_bw_2)477 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_3_4_bw_2){
478   for(size_t i = 3; i < 4; ++i){
479     TransposeMicrokernelTester()
480       .input_stride(2)
481       .output_stride(i)
482       .block_width(2)
483       .block_height(i)
484       .element_size(8)
485       .iterations(1)
486       .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
487   }
488 }
489 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_3_4_bw_2_2)490 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_3_4_bw_2_2) {
491   for(size_t i = 3; i < 4; ++i){
492     for(size_t j = 2; j < 2; ++j){
493       TransposeMicrokernelTester()
494         .input_stride(j)
495         .output_stride(i)
496         .block_width(j)
497         .block_height(i)
498         .element_size(8)
499         .iterations(1)
500         .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
501     }
502   }
503 }
504 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1_is_2)505 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_is_2) {
506   TransposeMicrokernelTester()
507     .input_stride(2)
508     .output_stride(2)
509     .block_width(1)
510     .block_height(2)
511     .element_size(8)
512     .iterations(1)
513     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
514 }
515 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1_os_4)516 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_os_4) {
517   TransposeMicrokernelTester()
518     .input_stride(1)
519     .output_stride(4)
520     .block_width(1)
521     .block_height(2)
522     .element_size(8)
523     .iterations(1)
524     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
525 }
526 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1_is_2_os_4)527 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_is_2_os_4) {
528   TransposeMicrokernelTester()
529     .input_stride(2)
530     .output_stride(4)
531     .block_width(1)
532     .block_height(2)
533     .element_size(8)
534     .iterations(1)
535     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
536 }
537 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_34_bw_19_ies_19)538 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_34_bw_19_ies_19) {
539   TransposeMicrokernelTester()
540     .input_stride(19)
541     .output_stride(34)
542     .block_width(19)
543     .block_height(34)
544     .element_size(8)
545     .input_element_stride(19)
546     .iterations(1)
547     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
548 }
549 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_6_bw_5_oes_19)550 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_6_bw_5_oes_19) {
551   TransposeMicrokernelTester()
552     .input_stride(5)
553     .output_stride(6)
554     .block_width(5)
555     .block_height(6)
556     .element_size(8)
557     .output_element_stride(19)
558     .iterations(1)
559     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
560 }
561 
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_14_bw_23_ies_25_oes_21)562 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_14_bw_23_ies_25_oes_21) {
563   TransposeMicrokernelTester()
564     .input_stride(28)
565     .output_stride(20)
566     .block_width(23)
567     .block_height(14)
568     .element_size(8)
569     .input_element_stride(25)
570     .output_element_stride(21)
571     .iterations(1)
572     .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
573 }
574 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1)575 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1) {
576   TransposeMicrokernelTester()
577     .input_stride(2)
578     .output_stride(4)
579     .block_width(1)
580     .block_height(2)
581     .element_size(8)
582     .iterations(1)
583     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
584 }
585 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_1_4_bw_1_2)586 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_1_4_bw_1_2) {
587   for(size_t i = 1; i <= 4; ++i){
588     for(size_t j = 1; j <= 2; ++j){
589       TransposeMicrokernelTester()
590         .input_stride(j * 3)
591         .output_stride(i * 7)
592         .block_width(j)
593         .block_height(i)
594         .element_size(8)
595         .iterations(1)
596         .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
597     }
598   }
599 }
600 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_2)601 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_2) {
602   TransposeMicrokernelTester()
603     .input_stride(2)
604     .output_stride(2)
605     .block_width(2)
606     .block_height(2)
607     .element_size(8)
608     .iterations(1)
609     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
610 }
611 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_2_2)612 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_2_2) {
613   for(size_t i = 2; i < 2; ++i){
614     TransposeMicrokernelTester()
615       .input_stride(i)
616       .output_stride(4)
617       .block_width(i)
618       .block_height(2)
619       .element_size(8)
620       .iterations(1)
621       .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
622   }
623 }
624 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_4_bw_2_2)625 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_4_bw_2_2) {
626   for(size_t i = 2; i < 2; ++i){
627     TransposeMicrokernelTester()
628       .input_stride(i)
629       .output_stride(4)
630       .block_width(i)
631       .block_height(4)
632       .element_size(8)
633       .iterations(1)
634       .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
635   }
636 }
637 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_4_bw_1)638 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_4_bw_1) {
639   TransposeMicrokernelTester()
640     .input_stride(1)
641     .output_stride(10)
642     .block_width(1)
643     .block_height(4)
644     .element_size(8)
645     .iterations(1)
646     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
647 }
648 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_3_4_bw_1)649 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_3_4_bw_1){
650   for(size_t i = 3; i < 4; ++i){
651     TransposeMicrokernelTester()
652       .input_stride(18)
653       .output_stride(i)
654       .block_width(4)
655       .block_height(i)
656       .element_size(8)
657       .iterations(1)
658       .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
659   }
660 }
661 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_3_4_bw_2)662 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_3_4_bw_2){
663   for(size_t i = 3; i < 4; ++i){
664     TransposeMicrokernelTester()
665       .input_stride(2)
666       .output_stride(i)
667       .block_width(2)
668       .block_height(i)
669       .element_size(8)
670       .iterations(1)
671       .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
672   }
673 }
674 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_3_4_bw_2_2)675 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_3_4_bw_2_2) {
676   for(size_t i = 3; i < 4; ++i){
677     for(size_t j = 2; j < 2; ++j){
678       TransposeMicrokernelTester()
679         .input_stride(j)
680         .output_stride(i)
681         .block_width(j)
682         .block_height(i)
683         .element_size(8)
684         .iterations(1)
685         .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
686     }
687   }
688 }
689 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1_is_2)690 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_is_2) {
691   TransposeMicrokernelTester()
692     .input_stride(2)
693     .output_stride(2)
694     .block_width(1)
695     .block_height(2)
696     .element_size(8)
697     .iterations(1)
698     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
699 }
700 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1_os_4)701 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_os_4) {
702   TransposeMicrokernelTester()
703     .input_stride(1)
704     .output_stride(4)
705     .block_width(1)
706     .block_height(2)
707     .element_size(8)
708     .iterations(1)
709     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
710 }
711 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1_is_2_os_4)712 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_is_2_os_4) {
713   TransposeMicrokernelTester()
714     .input_stride(2)
715     .output_stride(4)
716     .block_width(1)
717     .block_height(2)
718     .element_size(8)
719     .iterations(1)
720     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
721 }
722 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_34_bw_19_ies_19)723 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_34_bw_19_ies_19) {
724   TransposeMicrokernelTester()
725     .input_stride(19)
726     .output_stride(34)
727     .block_width(19)
728     .block_height(34)
729     .element_size(8)
730     .input_element_stride(19)
731     .iterations(1)
732     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
733 }
734 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_6_bw_5_oes_19)735 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_6_bw_5_oes_19) {
736   TransposeMicrokernelTester()
737     .input_stride(5)
738     .output_stride(6)
739     .block_width(5)
740     .block_height(6)
741     .element_size(8)
742     .output_element_stride(19)
743     .iterations(1)
744     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
745 }
746 
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_14_bw_23_ies_25_oes_21)747 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_14_bw_23_ies_25_oes_21) {
748   TransposeMicrokernelTester()
749     .input_stride(28)
750     .output_stride(20)
751     .block_width(23)
752     .block_height(14)
753     .element_size(8)
754     .input_element_stride(25)
755     .output_element_stride(21)
756     .iterations(1)
757     .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
758 }
759 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2)760 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2) {
761   TransposeMicrokernelTester()
762     .input_stride(4)
763     .output_stride(4)
764     .block_width(2)
765     .block_height(2)
766     .element_size(8)
767     .iterations(1)
768     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
769 }
770 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_1_4_bw_1_4)771 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_1_4_bw_1_4) {
772   for(size_t i = 1; i <= 4; ++i){
773     for(size_t j = 1; j <= 4; ++j){
774       TransposeMicrokernelTester()
775         .input_stride(j * 3)
776         .output_stride(i * 7)
777         .block_width(j)
778         .block_height(i)
779         .element_size(8)
780         .iterations(1)
781         .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
782     }
783   }
784 }
785 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_4)786 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_4) {
787   TransposeMicrokernelTester()
788     .input_stride(4)
789     .output_stride(2)
790     .block_width(4)
791     .block_height(2)
792     .element_size(8)
793     .iterations(1)
794     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
795 }
796 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_3_4)797 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_3_4) {
798   for(size_t i = 3; i < 4; ++i){
799     TransposeMicrokernelTester()
800       .input_stride(i)
801       .output_stride(4)
802       .block_width(i)
803       .block_height(2)
804       .element_size(8)
805       .iterations(1)
806       .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
807   }
808 }
809 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_4_bw_3_4)810 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_4_bw_3_4) {
811   for(size_t i = 3; i < 4; ++i){
812     TransposeMicrokernelTester()
813       .input_stride(i)
814       .output_stride(4)
815       .block_width(i)
816       .block_height(4)
817       .element_size(8)
818       .iterations(1)
819       .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
820   }
821 }
822 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_4_bw_2)823 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_4_bw_2) {
824   TransposeMicrokernelTester()
825     .input_stride(2)
826     .output_stride(10)
827     .block_width(2)
828     .block_height(4)
829     .element_size(8)
830     .iterations(1)
831     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
832 }
833 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_3_4_bw_2)834 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_3_4_bw_2){
835   for(size_t i = 3; i < 4; ++i){
836     TransposeMicrokernelTester()
837       .input_stride(19)
838       .output_stride(i)
839       .block_width(5)
840       .block_height(i)
841       .element_size(8)
842       .iterations(1)
843       .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
844   }
845 }
846 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_3_4_bw_4)847 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_3_4_bw_4){
848   for(size_t i = 3; i < 4; ++i){
849     TransposeMicrokernelTester()
850       .input_stride(4)
851       .output_stride(i)
852       .block_width(4)
853       .block_height(i)
854       .element_size(8)
855       .iterations(1)
856       .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
857   }
858 }
859 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_3_4_bw_3_4)860 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_3_4_bw_3_4) {
861   for(size_t i = 3; i < 4; ++i){
862     for(size_t j = 3; j < 4; ++j){
863       TransposeMicrokernelTester()
864         .input_stride(j)
865         .output_stride(i)
866         .block_width(j)
867         .block_height(i)
868         .element_size(8)
869         .iterations(1)
870         .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
871     }
872   }
873 }
874 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2_is_4)875 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_is_4) {
876   TransposeMicrokernelTester()
877     .input_stride(4)
878     .output_stride(2)
879     .block_width(2)
880     .block_height(2)
881     .element_size(8)
882     .iterations(1)
883     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
884 }
885 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2_os_4)886 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_os_4) {
887   TransposeMicrokernelTester()
888     .input_stride(2)
889     .output_stride(4)
890     .block_width(2)
891     .block_height(2)
892     .element_size(8)
893     .iterations(1)
894     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
895 }
896 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2_is_4_os_4)897 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_is_4_os_4) {
898   TransposeMicrokernelTester()
899     .input_stride(4)
900     .output_stride(4)
901     .block_width(2)
902     .block_height(2)
903     .element_size(8)
904     .iterations(1)
905     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
906 }
907 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_34_bw_38_ies_19)908 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_34_bw_38_ies_19) {
909   TransposeMicrokernelTester()
910     .input_stride(38)
911     .output_stride(34)
912     .block_width(38)
913     .block_height(34)
914     .element_size(8)
915     .input_element_stride(19)
916     .iterations(1)
917     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
918 }
919 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_6_bw_10_oes_19)920 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_6_bw_10_oes_19) {
921   TransposeMicrokernelTester()
922     .input_stride(10)
923     .output_stride(6)
924     .block_width(10)
925     .block_height(6)
926     .element_size(8)
927     .output_element_stride(19)
928     .iterations(1)
929     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
930 }
931 
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_14_bw_46_ies_25_oes_21)932 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_14_bw_46_ies_25_oes_21) {
933   TransposeMicrokernelTester()
934     .input_stride(51)
935     .output_stride(20)
936     .block_width(46)
937     .block_height(14)
938     .element_size(8)
939     .input_element_stride(25)
940     .output_element_stride(21)
941     .iterations(1)
942     .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
943 }
944 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2)945 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2) {
946   TransposeMicrokernelTester()
947     .input_stride(4)
948     .output_stride(4)
949     .block_width(2)
950     .block_height(2)
951     .element_size(8)
952     .iterations(1)
953     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
954 }
955 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_1_4_bw_1_4)956 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_1_4_bw_1_4) {
957   for(size_t i = 1; i <= 4; ++i){
958     for(size_t j = 1; j <= 4; ++j){
959       TransposeMicrokernelTester()
960         .input_stride(j * 3)
961         .output_stride(i * 7)
962         .block_width(j)
963         .block_height(i)
964         .element_size(8)
965         .iterations(1)
966         .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
967     }
968   }
969 }
970 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_4)971 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_4) {
972   TransposeMicrokernelTester()
973     .input_stride(4)
974     .output_stride(2)
975     .block_width(4)
976     .block_height(2)
977     .element_size(8)
978     .iterations(1)
979     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
980 }
981 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_3_4)982 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_3_4) {
983   for(size_t i = 3; i < 4; ++i){
984     TransposeMicrokernelTester()
985       .input_stride(i)
986       .output_stride(4)
987       .block_width(i)
988       .block_height(2)
989       .element_size(8)
990       .iterations(1)
991       .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
992   }
993 }
994 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_4_bw_3_4)995 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_4_bw_3_4) {
996   for(size_t i = 3; i < 4; ++i){
997     TransposeMicrokernelTester()
998       .input_stride(i)
999       .output_stride(4)
1000       .block_width(i)
1001       .block_height(4)
1002       .element_size(8)
1003       .iterations(1)
1004       .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1005   }
1006 }
1007 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_4_bw_2)1008 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_4_bw_2) {
1009   TransposeMicrokernelTester()
1010     .input_stride(2)
1011     .output_stride(10)
1012     .block_width(2)
1013     .block_height(4)
1014     .element_size(8)
1015     .iterations(1)
1016     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1017 }
1018 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_3_4_bw_2)1019 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_3_4_bw_2){
1020   for(size_t i = 3; i < 4; ++i){
1021     TransposeMicrokernelTester()
1022       .input_stride(19)
1023       .output_stride(i)
1024       .block_width(5)
1025       .block_height(i)
1026       .element_size(8)
1027       .iterations(1)
1028       .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1029   }
1030 }
1031 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_3_4_bw_4)1032 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_3_4_bw_4){
1033   for(size_t i = 3; i < 4; ++i){
1034     TransposeMicrokernelTester()
1035       .input_stride(4)
1036       .output_stride(i)
1037       .block_width(4)
1038       .block_height(i)
1039       .element_size(8)
1040       .iterations(1)
1041       .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1042   }
1043 }
1044 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_3_4_bw_3_4)1045 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_3_4_bw_3_4) {
1046   for(size_t i = 3; i < 4; ++i){
1047     for(size_t j = 3; j < 4; ++j){
1048       TransposeMicrokernelTester()
1049         .input_stride(j)
1050         .output_stride(i)
1051         .block_width(j)
1052         .block_height(i)
1053         .element_size(8)
1054         .iterations(1)
1055         .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1056     }
1057   }
1058 }
1059 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2_is_4)1060 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_is_4) {
1061   TransposeMicrokernelTester()
1062     .input_stride(4)
1063     .output_stride(2)
1064     .block_width(2)
1065     .block_height(2)
1066     .element_size(8)
1067     .iterations(1)
1068     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1069 }
1070 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2_os_4)1071 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_os_4) {
1072   TransposeMicrokernelTester()
1073     .input_stride(2)
1074     .output_stride(4)
1075     .block_width(2)
1076     .block_height(2)
1077     .element_size(8)
1078     .iterations(1)
1079     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1080 }
1081 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2_is_4_os_4)1082 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_is_4_os_4) {
1083   TransposeMicrokernelTester()
1084     .input_stride(4)
1085     .output_stride(4)
1086     .block_width(2)
1087     .block_height(2)
1088     .element_size(8)
1089     .iterations(1)
1090     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1091 }
1092 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_34_bw_38_ies_19)1093 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_34_bw_38_ies_19) {
1094   TransposeMicrokernelTester()
1095     .input_stride(38)
1096     .output_stride(34)
1097     .block_width(38)
1098     .block_height(34)
1099     .element_size(8)
1100     .input_element_stride(19)
1101     .iterations(1)
1102     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1103 }
1104 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_6_bw_10_oes_19)1105 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_6_bw_10_oes_19) {
1106   TransposeMicrokernelTester()
1107     .input_stride(10)
1108     .output_stride(6)
1109     .block_width(10)
1110     .block_height(6)
1111     .element_size(8)
1112     .output_element_stride(19)
1113     .iterations(1)
1114     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1115 }
1116 
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_14_bw_46_ies_25_oes_21)1117 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_14_bw_46_ies_25_oes_21) {
1118   TransposeMicrokernelTester()
1119     .input_stride(51)
1120     .output_stride(20)
1121     .block_width(46)
1122     .block_height(14)
1123     .element_size(8)
1124     .input_element_stride(25)
1125     .output_element_stride(21)
1126     .iterations(1)
1127     .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1128 }
1129 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1)1130 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1) {
1131   TransposeMicrokernelTester()
1132     .input_stride(2)
1133     .output_stride(8)
1134     .block_width(1)
1135     .block_height(4)
1136     .element_size(8)
1137     .iterations(1)
1138     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1139 }
1140 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_1_8_bw_1_2)1141 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_1_8_bw_1_2) {
1142   for(size_t i = 1; i <= 8; ++i){
1143     for(size_t j = 1; j <= 2; ++j){
1144       TransposeMicrokernelTester()
1145         .input_stride(j * 3)
1146         .output_stride(i * 7)
1147         .block_width(j)
1148         .block_height(i)
1149         .element_size(8)
1150         .iterations(1)
1151         .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1152     }
1153   }
1154 }
1155 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_2)1156 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_2) {
1157   TransposeMicrokernelTester()
1158     .input_stride(2)
1159     .output_stride(4)
1160     .block_width(2)
1161     .block_height(4)
1162     .element_size(8)
1163     .iterations(1)
1164     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1165 }
1166 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_2_2)1167 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_2_2) {
1168   for(size_t i = 2; i < 2; ++i){
1169     TransposeMicrokernelTester()
1170       .input_stride(i)
1171       .output_stride(8)
1172       .block_width(i)
1173       .block_height(4)
1174       .element_size(8)
1175       .iterations(1)
1176       .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1177   }
1178 }
1179 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_8_bw_2_2)1180 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_8_bw_2_2) {
1181   for(size_t i = 2; i < 2; ++i){
1182     TransposeMicrokernelTester()
1183       .input_stride(i)
1184       .output_stride(8)
1185       .block_width(i)
1186       .block_height(8)
1187       .element_size(8)
1188       .iterations(1)
1189       .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1190   }
1191 }
1192 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_8_bw_1)1193 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_8_bw_1) {
1194   TransposeMicrokernelTester()
1195     .input_stride(1)
1196     .output_stride(16)
1197     .block_width(1)
1198     .block_height(8)
1199     .element_size(8)
1200     .iterations(1)
1201     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1202 }
1203 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_5_8_bw_1)1204 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_5_8_bw_1){
1205   for(size_t i = 5; i < 8; ++i){
1206     TransposeMicrokernelTester()
1207       .input_stride(18)
1208       .output_stride(i)
1209       .block_width(4)
1210       .block_height(i)
1211       .element_size(8)
1212       .iterations(1)
1213       .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1214   }
1215 }
1216 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_5_8_bw_2)1217 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_5_8_bw_2){
1218   for(size_t i = 5; i < 8; ++i){
1219     TransposeMicrokernelTester()
1220       .input_stride(2)
1221       .output_stride(i)
1222       .block_width(2)
1223       .block_height(i)
1224       .element_size(8)
1225       .iterations(1)
1226       .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1227   }
1228 }
1229 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_5_8_bw_2_2)1230 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_5_8_bw_2_2) {
1231   for(size_t i = 5; i < 8; ++i){
1232     for(size_t j = 2; j < 2; ++j){
1233       TransposeMicrokernelTester()
1234         .input_stride(j)
1235         .output_stride(i)
1236         .block_width(j)
1237         .block_height(i)
1238         .element_size(8)
1239         .iterations(1)
1240         .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1241     }
1242   }
1243 }
1244 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1_is_2)1245 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_is_2) {
1246   TransposeMicrokernelTester()
1247     .input_stride(2)
1248     .output_stride(4)
1249     .block_width(1)
1250     .block_height(4)
1251     .element_size(8)
1252     .iterations(1)
1253     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1254 }
1255 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1_os_8)1256 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_os_8) {
1257   TransposeMicrokernelTester()
1258     .input_stride(1)
1259     .output_stride(8)
1260     .block_width(1)
1261     .block_height(4)
1262     .element_size(8)
1263     .iterations(1)
1264     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1265 }
1266 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1_is_2_os_8)1267 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_is_2_os_8) {
1268   TransposeMicrokernelTester()
1269     .input_stride(2)
1270     .output_stride(8)
1271     .block_width(1)
1272     .block_height(4)
1273     .element_size(8)
1274     .iterations(1)
1275     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1276 }
1277 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_68_bw_19_ies_19)1278 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_68_bw_19_ies_19) {
1279   TransposeMicrokernelTester()
1280     .input_stride(19)
1281     .output_stride(68)
1282     .block_width(19)
1283     .block_height(68)
1284     .element_size(8)
1285     .input_element_stride(19)
1286     .iterations(1)
1287     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1288 }
1289 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_12_bw_5_oes_19)1290 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_12_bw_5_oes_19) {
1291   TransposeMicrokernelTester()
1292     .input_stride(5)
1293     .output_stride(12)
1294     .block_width(5)
1295     .block_height(12)
1296     .element_size(8)
1297     .output_element_stride(19)
1298     .iterations(1)
1299     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1300 }
1301 
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_28_bw_23_ies_25_oes_21)1302 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_28_bw_23_ies_25_oes_21) {
1303   TransposeMicrokernelTester()
1304     .input_stride(28)
1305     .output_stride(34)
1306     .block_width(23)
1307     .block_height(28)
1308     .element_size(8)
1309     .input_element_stride(25)
1310     .output_element_stride(21)
1311     .iterations(1)
1312     .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1313 }
1314 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1)1315 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1) {
1316   TransposeMicrokernelTester()
1317     .input_stride(2)
1318     .output_stride(8)
1319     .block_width(1)
1320     .block_height(4)
1321     .element_size(8)
1322     .iterations(1)
1323     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1324 }
1325 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_1_8_bw_1_2)1326 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_1_8_bw_1_2) {
1327   for(size_t i = 1; i <= 8; ++i){
1328     for(size_t j = 1; j <= 2; ++j){
1329       TransposeMicrokernelTester()
1330         .input_stride(j * 3)
1331         .output_stride(i * 7)
1332         .block_width(j)
1333         .block_height(i)
1334         .element_size(8)
1335         .iterations(1)
1336         .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1337     }
1338   }
1339 }
1340 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_2)1341 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_2) {
1342   TransposeMicrokernelTester()
1343     .input_stride(2)
1344     .output_stride(4)
1345     .block_width(2)
1346     .block_height(4)
1347     .element_size(8)
1348     .iterations(1)
1349     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1350 }
1351 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_2_2)1352 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_2_2) {
1353   for(size_t i = 2; i < 2; ++i){
1354     TransposeMicrokernelTester()
1355       .input_stride(i)
1356       .output_stride(8)
1357       .block_width(i)
1358       .block_height(4)
1359       .element_size(8)
1360       .iterations(1)
1361       .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1362   }
1363 }
1364 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_8_bw_2_2)1365 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_8_bw_2_2) {
1366   for(size_t i = 2; i < 2; ++i){
1367     TransposeMicrokernelTester()
1368       .input_stride(i)
1369       .output_stride(8)
1370       .block_width(i)
1371       .block_height(8)
1372       .element_size(8)
1373       .iterations(1)
1374       .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1375   }
1376 }
1377 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_8_bw_1)1378 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_8_bw_1) {
1379   TransposeMicrokernelTester()
1380     .input_stride(1)
1381     .output_stride(16)
1382     .block_width(1)
1383     .block_height(8)
1384     .element_size(8)
1385     .iterations(1)
1386     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1387 }
1388 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_5_8_bw_1)1389 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_5_8_bw_1){
1390   for(size_t i = 5; i < 8; ++i){
1391     TransposeMicrokernelTester()
1392       .input_stride(18)
1393       .output_stride(i)
1394       .block_width(4)
1395       .block_height(i)
1396       .element_size(8)
1397       .iterations(1)
1398       .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1399   }
1400 }
1401 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_5_8_bw_2)1402 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_5_8_bw_2){
1403   for(size_t i = 5; i < 8; ++i){
1404     TransposeMicrokernelTester()
1405       .input_stride(2)
1406       .output_stride(i)
1407       .block_width(2)
1408       .block_height(i)
1409       .element_size(8)
1410       .iterations(1)
1411       .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1412   }
1413 }
1414 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_5_8_bw_2_2)1415 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_5_8_bw_2_2) {
1416   for(size_t i = 5; i < 8; ++i){
1417     for(size_t j = 2; j < 2; ++j){
1418       TransposeMicrokernelTester()
1419         .input_stride(j)
1420         .output_stride(i)
1421         .block_width(j)
1422         .block_height(i)
1423         .element_size(8)
1424         .iterations(1)
1425         .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1426     }
1427   }
1428 }
1429 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1_is_2)1430 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_is_2) {
1431   TransposeMicrokernelTester()
1432     .input_stride(2)
1433     .output_stride(4)
1434     .block_width(1)
1435     .block_height(4)
1436     .element_size(8)
1437     .iterations(1)
1438     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1439 }
1440 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1_os_8)1441 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_os_8) {
1442   TransposeMicrokernelTester()
1443     .input_stride(1)
1444     .output_stride(8)
1445     .block_width(1)
1446     .block_height(4)
1447     .element_size(8)
1448     .iterations(1)
1449     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1450 }
1451 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1_is_2_os_8)1452 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_is_2_os_8) {
1453   TransposeMicrokernelTester()
1454     .input_stride(2)
1455     .output_stride(8)
1456     .block_width(1)
1457     .block_height(4)
1458     .element_size(8)
1459     .iterations(1)
1460     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1461 }
1462 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_68_bw_19_ies_19)1463 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_68_bw_19_ies_19) {
1464   TransposeMicrokernelTester()
1465     .input_stride(19)
1466     .output_stride(68)
1467     .block_width(19)
1468     .block_height(68)
1469     .element_size(8)
1470     .input_element_stride(19)
1471     .iterations(1)
1472     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1473 }
1474 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_12_bw_5_oes_19)1475 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_12_bw_5_oes_19) {
1476   TransposeMicrokernelTester()
1477     .input_stride(5)
1478     .output_stride(12)
1479     .block_width(5)
1480     .block_height(12)
1481     .element_size(8)
1482     .output_element_stride(19)
1483     .iterations(1)
1484     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1485 }
1486 
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_28_bw_23_ies_25_oes_21)1487 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_28_bw_23_ies_25_oes_21) {
1488   TransposeMicrokernelTester()
1489     .input_stride(28)
1490     .output_stride(34)
1491     .block_width(23)
1492     .block_height(28)
1493     .element_size(8)
1494     .input_element_stride(25)
1495     .output_element_stride(21)
1496     .iterations(1)
1497     .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1498 }
1499 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2)1500 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2) {
1501   TransposeMicrokernelTester()
1502     .input_stride(4)
1503     .output_stride(8)
1504     .block_width(2)
1505     .block_height(4)
1506     .element_size(8)
1507     .iterations(1)
1508     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1509 }
1510 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_1_8_bw_1_4)1511 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_1_8_bw_1_4) {
1512   for(size_t i = 1; i <= 8; ++i){
1513     for(size_t j = 1; j <= 4; ++j){
1514       TransposeMicrokernelTester()
1515         .input_stride(j * 3)
1516         .output_stride(i * 7)
1517         .block_width(j)
1518         .block_height(i)
1519         .element_size(8)
1520         .iterations(1)
1521         .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1522     }
1523   }
1524 }
1525 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_4)1526 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_4) {
1527   TransposeMicrokernelTester()
1528     .input_stride(4)
1529     .output_stride(4)
1530     .block_width(4)
1531     .block_height(4)
1532     .element_size(8)
1533     .iterations(1)
1534     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1535 }
1536 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_3_4)1537 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_3_4) {
1538   for(size_t i = 3; i < 4; ++i){
1539     TransposeMicrokernelTester()
1540       .input_stride(i)
1541       .output_stride(8)
1542       .block_width(i)
1543       .block_height(4)
1544       .element_size(8)
1545       .iterations(1)
1546       .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1547   }
1548 }
1549 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_8_bw_3_4)1550 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_8_bw_3_4) {
1551   for(size_t i = 3; i < 4; ++i){
1552     TransposeMicrokernelTester()
1553       .input_stride(i)
1554       .output_stride(8)
1555       .block_width(i)
1556       .block_height(8)
1557       .element_size(8)
1558       .iterations(1)
1559       .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1560   }
1561 }
1562 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_8_bw_2)1563 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_8_bw_2) {
1564   TransposeMicrokernelTester()
1565     .input_stride(2)
1566     .output_stride(16)
1567     .block_width(2)
1568     .block_height(8)
1569     .element_size(8)
1570     .iterations(1)
1571     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1572 }
1573 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_5_8_bw_2)1574 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_5_8_bw_2){
1575   for(size_t i = 5; i < 8; ++i){
1576     TransposeMicrokernelTester()
1577       .input_stride(19)
1578       .output_stride(i)
1579       .block_width(5)
1580       .block_height(i)
1581       .element_size(8)
1582       .iterations(1)
1583       .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1584   }
1585 }
1586 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_5_8_bw_4)1587 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_5_8_bw_4){
1588   for(size_t i = 5; i < 8; ++i){
1589     TransposeMicrokernelTester()
1590       .input_stride(4)
1591       .output_stride(i)
1592       .block_width(4)
1593       .block_height(i)
1594       .element_size(8)
1595       .iterations(1)
1596       .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1597   }
1598 }
1599 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_5_8_bw_3_4)1600 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_5_8_bw_3_4) {
1601   for(size_t i = 5; i < 8; ++i){
1602     for(size_t j = 3; j < 4; ++j){
1603       TransposeMicrokernelTester()
1604         .input_stride(j)
1605         .output_stride(i)
1606         .block_width(j)
1607         .block_height(i)
1608         .element_size(8)
1609         .iterations(1)
1610         .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1611     }
1612   }
1613 }
1614 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2_is_4)1615 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_is_4) {
1616   TransposeMicrokernelTester()
1617     .input_stride(4)
1618     .output_stride(4)
1619     .block_width(2)
1620     .block_height(4)
1621     .element_size(8)
1622     .iterations(1)
1623     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1624 }
1625 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2_os_8)1626 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_os_8) {
1627   TransposeMicrokernelTester()
1628     .input_stride(2)
1629     .output_stride(8)
1630     .block_width(2)
1631     .block_height(4)
1632     .element_size(8)
1633     .iterations(1)
1634     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1635 }
1636 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2_is_4_os_8)1637 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_is_4_os_8) {
1638   TransposeMicrokernelTester()
1639     .input_stride(4)
1640     .output_stride(8)
1641     .block_width(2)
1642     .block_height(4)
1643     .element_size(8)
1644     .iterations(1)
1645     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1646 }
1647 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_68_bw_38_ies_19)1648 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_68_bw_38_ies_19) {
1649   TransposeMicrokernelTester()
1650     .input_stride(38)
1651     .output_stride(68)
1652     .block_width(38)
1653     .block_height(68)
1654     .element_size(8)
1655     .input_element_stride(19)
1656     .iterations(1)
1657     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1658 }
1659 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_12_bw_10_oes_19)1660 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_12_bw_10_oes_19) {
1661   TransposeMicrokernelTester()
1662     .input_stride(10)
1663     .output_stride(12)
1664     .block_width(10)
1665     .block_height(12)
1666     .element_size(8)
1667     .output_element_stride(19)
1668     .iterations(1)
1669     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1670 }
1671 
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_28_bw_46_ies_25_oes_21)1672 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_28_bw_46_ies_25_oes_21) {
1673   TransposeMicrokernelTester()
1674     .input_stride(51)
1675     .output_stride(34)
1676     .block_width(46)
1677     .block_height(28)
1678     .element_size(8)
1679     .input_element_stride(25)
1680     .output_element_stride(21)
1681     .iterations(1)
1682     .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1683 }
1684 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2)1685 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2) {
1686   TransposeMicrokernelTester()
1687     .input_stride(4)
1688     .output_stride(8)
1689     .block_width(2)
1690     .block_height(4)
1691     .element_size(8)
1692     .iterations(1)
1693     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1694 }
1695 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_1_8_bw_1_4)1696 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_1_8_bw_1_4) {
1697   for(size_t i = 1; i <= 8; ++i){
1698     for(size_t j = 1; j <= 4; ++j){
1699       TransposeMicrokernelTester()
1700         .input_stride(j * 3)
1701         .output_stride(i * 7)
1702         .block_width(j)
1703         .block_height(i)
1704         .element_size(8)
1705         .iterations(1)
1706         .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1707     }
1708   }
1709 }
1710 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_4)1711 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_4) {
1712   TransposeMicrokernelTester()
1713     .input_stride(4)
1714     .output_stride(4)
1715     .block_width(4)
1716     .block_height(4)
1717     .element_size(8)
1718     .iterations(1)
1719     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1720 }
1721 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_3_4)1722 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_3_4) {
1723   for(size_t i = 3; i < 4; ++i){
1724     TransposeMicrokernelTester()
1725       .input_stride(i)
1726       .output_stride(8)
1727       .block_width(i)
1728       .block_height(4)
1729       .element_size(8)
1730       .iterations(1)
1731       .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1732   }
1733 }
1734 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_8_bw_3_4)1735 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_8_bw_3_4) {
1736   for(size_t i = 3; i < 4; ++i){
1737     TransposeMicrokernelTester()
1738       .input_stride(i)
1739       .output_stride(8)
1740       .block_width(i)
1741       .block_height(8)
1742       .element_size(8)
1743       .iterations(1)
1744       .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1745   }
1746 }
1747 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_8_bw_2)1748 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_8_bw_2) {
1749   TransposeMicrokernelTester()
1750     .input_stride(2)
1751     .output_stride(16)
1752     .block_width(2)
1753     .block_height(8)
1754     .element_size(8)
1755     .iterations(1)
1756     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1757 }
1758 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_5_8_bw_2)1759 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_5_8_bw_2){
1760   for(size_t i = 5; i < 8; ++i){
1761     TransposeMicrokernelTester()
1762       .input_stride(19)
1763       .output_stride(i)
1764       .block_width(5)
1765       .block_height(i)
1766       .element_size(8)
1767       .iterations(1)
1768       .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1769   }
1770 }
1771 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_5_8_bw_4)1772 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_5_8_bw_4){
1773   for(size_t i = 5; i < 8; ++i){
1774     TransposeMicrokernelTester()
1775       .input_stride(4)
1776       .output_stride(i)
1777       .block_width(4)
1778       .block_height(i)
1779       .element_size(8)
1780       .iterations(1)
1781       .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1782   }
1783 }
1784 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_5_8_bw_3_4)1785 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_5_8_bw_3_4) {
1786   for(size_t i = 5; i < 8; ++i){
1787     for(size_t j = 3; j < 4; ++j){
1788       TransposeMicrokernelTester()
1789         .input_stride(j)
1790         .output_stride(i)
1791         .block_width(j)
1792         .block_height(i)
1793         .element_size(8)
1794         .iterations(1)
1795         .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1796     }
1797   }
1798 }
1799 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2_is_4)1800 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4) {
1801   TransposeMicrokernelTester()
1802     .input_stride(4)
1803     .output_stride(4)
1804     .block_width(2)
1805     .block_height(4)
1806     .element_size(8)
1807     .iterations(1)
1808     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1809 }
1810 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2_os_8)1811 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_os_8) {
1812   TransposeMicrokernelTester()
1813     .input_stride(2)
1814     .output_stride(8)
1815     .block_width(2)
1816     .block_height(4)
1817     .element_size(8)
1818     .iterations(1)
1819     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1820 }
1821 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2_is_4_os_8)1822 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
1823   TransposeMicrokernelTester()
1824     .input_stride(4)
1825     .output_stride(8)
1826     .block_width(2)
1827     .block_height(4)
1828     .element_size(8)
1829     .iterations(1)
1830     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1831 }
1832 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_68_bw_38_ies_19)1833 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_68_bw_38_ies_19) {
1834   TransposeMicrokernelTester()
1835     .input_stride(38)
1836     .output_stride(68)
1837     .block_width(38)
1838     .block_height(68)
1839     .element_size(8)
1840     .input_element_stride(19)
1841     .iterations(1)
1842     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1843 }
1844 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_12_bw_10_oes_19)1845 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_12_bw_10_oes_19) {
1846   TransposeMicrokernelTester()
1847     .input_stride(10)
1848     .output_stride(12)
1849     .block_width(10)
1850     .block_height(12)
1851     .element_size(8)
1852     .output_element_stride(19)
1853     .iterations(1)
1854     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1855 }
1856 
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_28_bw_46_ies_25_oes_21)1857 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_28_bw_46_ies_25_oes_21) {
1858   TransposeMicrokernelTester()
1859     .input_stride(51)
1860     .output_stride(34)
1861     .block_width(46)
1862     .block_height(28)
1863     .element_size(8)
1864     .input_element_stride(25)
1865     .output_element_stride(21)
1866     .iterations(1)
1867     .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1868 }
1869 
1870 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2)1871   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2) {
1872     TEST_REQUIRES_X86_SSE2;
1873     TransposeMicrokernelTester()
1874       .input_stride(4)
1875       .output_stride(4)
1876       .block_width(2)
1877       .block_height(2)
1878       .element_size(8)
1879       .iterations(1)
1880       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1881   }
1882 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_1_4_bw_1_4)1883   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_1_4_bw_1_4) {
1884     TEST_REQUIRES_X86_SSE2;
1885     for(size_t i = 1; i <= 4; ++i){
1886       for(size_t j = 1; j <= 4; ++j){
1887         TransposeMicrokernelTester()
1888           .input_stride(j * 3)
1889           .output_stride(i * 7)
1890           .block_width(j)
1891           .block_height(i)
1892           .element_size(8)
1893           .iterations(1)
1894           .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1895       }
1896     }
1897   }
1898 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_4)1899   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_4) {
1900     TEST_REQUIRES_X86_SSE2;
1901     TransposeMicrokernelTester()
1902       .input_stride(4)
1903       .output_stride(2)
1904       .block_width(4)
1905       .block_height(2)
1906       .element_size(8)
1907       .iterations(1)
1908       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1909   }
1910 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_3_4)1911   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_3_4) {
1912     TEST_REQUIRES_X86_SSE2;
1913     for(size_t i = 3; i < 4; ++i){
1914       TransposeMicrokernelTester()
1915         .input_stride(i)
1916         .output_stride(4)
1917         .block_width(i)
1918         .block_height(2)
1919         .element_size(8)
1920         .iterations(1)
1921         .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1922     }
1923   }
1924 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_4_bw_3_4)1925   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_4_bw_3_4) {
1926     TEST_REQUIRES_X86_SSE2;
1927     for(size_t i = 3; i < 4; ++i){
1928       TransposeMicrokernelTester()
1929         .input_stride(i)
1930         .output_stride(4)
1931         .block_width(i)
1932         .block_height(4)
1933         .element_size(8)
1934         .iterations(1)
1935         .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1936     }
1937   }
1938 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_4_bw_2)1939   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_4_bw_2) {
1940     TEST_REQUIRES_X86_SSE2;
1941     TransposeMicrokernelTester()
1942       .input_stride(2)
1943       .output_stride(10)
1944       .block_width(2)
1945       .block_height(4)
1946       .element_size(8)
1947       .iterations(1)
1948       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1949   }
1950 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_3_4_bw_2)1951   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_3_4_bw_2){
1952     TEST_REQUIRES_X86_SSE2;
1953     for(size_t i = 3; i < 4; ++i){
1954       TransposeMicrokernelTester()
1955         .input_stride(19)
1956         .output_stride(i)
1957         .block_width(5)
1958         .block_height(i)
1959         .element_size(8)
1960         .iterations(1)
1961         .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1962     }
1963   }
1964 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_3_4_bw_4)1965   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_3_4_bw_4){
1966     TEST_REQUIRES_X86_SSE2;
1967     for(size_t i = 3; i < 4; ++i){
1968       TransposeMicrokernelTester()
1969         .input_stride(4)
1970         .output_stride(i)
1971         .block_width(4)
1972         .block_height(i)
1973         .element_size(8)
1974         .iterations(1)
1975         .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1976     }
1977   }
1978 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_3_4_bw_3_4)1979   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_3_4_bw_3_4) {
1980     TEST_REQUIRES_X86_SSE2;
1981     for(size_t i = 3; i < 4; ++i){
1982       for(size_t j = 3; j < 4; ++j){
1983         TransposeMicrokernelTester()
1984           .input_stride(j)
1985           .output_stride(i)
1986           .block_width(j)
1987           .block_height(i)
1988           .element_size(8)
1989           .iterations(1)
1990           .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1991       }
1992     }
1993   }
1994 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2_is_4)1995   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2_is_4) {
1996     TEST_REQUIRES_X86_SSE2;
1997     TransposeMicrokernelTester()
1998       .input_stride(4)
1999       .output_stride(2)
2000       .block_width(2)
2001       .block_height(2)
2002       .element_size(8)
2003       .iterations(1)
2004       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2005   }
2006 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2_os_4)2007   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2_os_4) {
2008     TEST_REQUIRES_X86_SSE2;
2009     TransposeMicrokernelTester()
2010       .input_stride(2)
2011       .output_stride(4)
2012       .block_width(2)
2013       .block_height(2)
2014       .element_size(8)
2015       .iterations(1)
2016       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2017   }
2018 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2_is_4_os_4)2019   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2_is_4_os_4) {
2020     TEST_REQUIRES_X86_SSE2;
2021     TransposeMicrokernelTester()
2022       .input_stride(4)
2023       .output_stride(4)
2024       .block_width(2)
2025       .block_height(2)
2026       .element_size(8)
2027       .iterations(1)
2028       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2029   }
2030 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_34_bw_38_ies_19)2031   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_34_bw_38_ies_19) {
2032     TEST_REQUIRES_X86_SSE2;
2033     TransposeMicrokernelTester()
2034       .input_stride(38)
2035       .output_stride(34)
2036       .block_width(38)
2037       .block_height(34)
2038       .element_size(8)
2039       .input_element_stride(19)
2040       .iterations(1)
2041       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2042   }
2043 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_6_bw_10_oes_19)2044   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_6_bw_10_oes_19) {
2045     TEST_REQUIRES_X86_SSE2;
2046     TransposeMicrokernelTester()
2047       .input_stride(10)
2048       .output_stride(6)
2049       .block_width(10)
2050       .block_height(6)
2051       .element_size(8)
2052       .output_element_stride(19)
2053       .iterations(1)
2054       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2055   }
2056 
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_14_bw_46_ies_25_oes_21)2057   TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2058     TEST_REQUIRES_X86_SSE2;
2059     TransposeMicrokernelTester()
2060       .input_stride(51)
2061       .output_stride(20)
2062       .block_width(46)
2063       .block_height(14)
2064       .element_size(8)
2065       .input_element_stride(25)
2066       .output_element_stride(21)
2067       .iterations(1)
2068       .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2069   }
2070 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2071 
2072 
2073 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2)2074   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2) {
2075     TEST_REQUIRES_X86_SSE2;
2076     TransposeMicrokernelTester()
2077       .input_stride(4)
2078       .output_stride(4)
2079       .block_width(2)
2080       .block_height(2)
2081       .element_size(8)
2082       .iterations(1)
2083       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2084   }
2085 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_1_4_bw_1_4)2086   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_1_4_bw_1_4) {
2087     TEST_REQUIRES_X86_SSE2;
2088     for(size_t i = 1; i <= 4; ++i){
2089       for(size_t j = 1; j <= 4; ++j){
2090         TransposeMicrokernelTester()
2091           .input_stride(j * 3)
2092           .output_stride(i * 7)
2093           .block_width(j)
2094           .block_height(i)
2095           .element_size(8)
2096           .iterations(1)
2097           .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2098       }
2099     }
2100   }
2101 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_4)2102   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_4) {
2103     TEST_REQUIRES_X86_SSE2;
2104     TransposeMicrokernelTester()
2105       .input_stride(4)
2106       .output_stride(2)
2107       .block_width(4)
2108       .block_height(2)
2109       .element_size(8)
2110       .iterations(1)
2111       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2112   }
2113 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_3_4)2114   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_3_4) {
2115     TEST_REQUIRES_X86_SSE2;
2116     for(size_t i = 3; i < 4; ++i){
2117       TransposeMicrokernelTester()
2118         .input_stride(i)
2119         .output_stride(4)
2120         .block_width(i)
2121         .block_height(2)
2122         .element_size(8)
2123         .iterations(1)
2124         .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2125     }
2126   }
2127 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_4_bw_3_4)2128   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_4_bw_3_4) {
2129     TEST_REQUIRES_X86_SSE2;
2130     for(size_t i = 3; i < 4; ++i){
2131       TransposeMicrokernelTester()
2132         .input_stride(i)
2133         .output_stride(4)
2134         .block_width(i)
2135         .block_height(4)
2136         .element_size(8)
2137         .iterations(1)
2138         .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2139     }
2140   }
2141 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_4_bw_2)2142   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_4_bw_2) {
2143     TEST_REQUIRES_X86_SSE2;
2144     TransposeMicrokernelTester()
2145       .input_stride(2)
2146       .output_stride(10)
2147       .block_width(2)
2148       .block_height(4)
2149       .element_size(8)
2150       .iterations(1)
2151       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2152   }
2153 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_3_4_bw_2)2154   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_3_4_bw_2){
2155     TEST_REQUIRES_X86_SSE2;
2156     for(size_t i = 3; i < 4; ++i){
2157       TransposeMicrokernelTester()
2158         .input_stride(19)
2159         .output_stride(i)
2160         .block_width(5)
2161         .block_height(i)
2162         .element_size(8)
2163         .iterations(1)
2164         .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2165     }
2166   }
2167 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_3_4_bw_4)2168   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_3_4_bw_4){
2169     TEST_REQUIRES_X86_SSE2;
2170     for(size_t i = 3; i < 4; ++i){
2171       TransposeMicrokernelTester()
2172         .input_stride(4)
2173         .output_stride(i)
2174         .block_width(4)
2175         .block_height(i)
2176         .element_size(8)
2177         .iterations(1)
2178         .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2179     }
2180   }
2181 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_3_4_bw_3_4)2182   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_3_4_bw_3_4) {
2183     TEST_REQUIRES_X86_SSE2;
2184     for(size_t i = 3; i < 4; ++i){
2185       for(size_t j = 3; j < 4; ++j){
2186         TransposeMicrokernelTester()
2187           .input_stride(j)
2188           .output_stride(i)
2189           .block_width(j)
2190           .block_height(i)
2191           .element_size(8)
2192           .iterations(1)
2193           .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2194       }
2195     }
2196   }
2197 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2_is_4)2198   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2_is_4) {
2199     TEST_REQUIRES_X86_SSE2;
2200     TransposeMicrokernelTester()
2201       .input_stride(4)
2202       .output_stride(2)
2203       .block_width(2)
2204       .block_height(2)
2205       .element_size(8)
2206       .iterations(1)
2207       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2208   }
2209 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2_os_4)2210   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2_os_4) {
2211     TEST_REQUIRES_X86_SSE2;
2212     TransposeMicrokernelTester()
2213       .input_stride(2)
2214       .output_stride(4)
2215       .block_width(2)
2216       .block_height(2)
2217       .element_size(8)
2218       .iterations(1)
2219       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2220   }
2221 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2_is_4_os_4)2222   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2_is_4_os_4) {
2223     TEST_REQUIRES_X86_SSE2;
2224     TransposeMicrokernelTester()
2225       .input_stride(4)
2226       .output_stride(4)
2227       .block_width(2)
2228       .block_height(2)
2229       .element_size(8)
2230       .iterations(1)
2231       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2232   }
2233 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_34_bw_38_ies_19)2234   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_34_bw_38_ies_19) {
2235     TEST_REQUIRES_X86_SSE2;
2236     TransposeMicrokernelTester()
2237       .input_stride(38)
2238       .output_stride(34)
2239       .block_width(38)
2240       .block_height(34)
2241       .element_size(8)
2242       .input_element_stride(19)
2243       .iterations(1)
2244       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2245   }
2246 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_6_bw_10_oes_19)2247   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_6_bw_10_oes_19) {
2248     TEST_REQUIRES_X86_SSE2;
2249     TransposeMicrokernelTester()
2250       .input_stride(10)
2251       .output_stride(6)
2252       .block_width(10)
2253       .block_height(6)
2254       .element_size(8)
2255       .output_element_stride(19)
2256       .iterations(1)
2257       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2258   }
2259 
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_14_bw_46_ies_25_oes_21)2260   TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2261     TEST_REQUIRES_X86_SSE2;
2262     TransposeMicrokernelTester()
2263       .input_stride(51)
2264       .output_stride(20)
2265       .block_width(46)
2266       .block_height(14)
2267       .element_size(8)
2268       .input_element_stride(25)
2269       .output_element_stride(21)
2270       .iterations(1)
2271       .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2272   }
2273 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2274 
2275 
2276 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2)2277   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2) {
2278     TEST_REQUIRES_X86_SSE2;
2279     TransposeMicrokernelTester()
2280       .input_stride(4)
2281       .output_stride(4)
2282       .block_width(2)
2283       .block_height(2)
2284       .element_size(8)
2285       .iterations(1)
2286       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2287   }
2288 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_1_4_bw_1_4)2289   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_1_4_bw_1_4) {
2290     TEST_REQUIRES_X86_SSE2;
2291     for(size_t i = 1; i <= 4; ++i){
2292       for(size_t j = 1; j <= 4; ++j){
2293         TransposeMicrokernelTester()
2294           .input_stride(j * 3)
2295           .output_stride(i * 7)
2296           .block_width(j)
2297           .block_height(i)
2298           .element_size(8)
2299           .iterations(1)
2300           .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2301       }
2302     }
2303   }
2304 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_4)2305   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_4) {
2306     TEST_REQUIRES_X86_SSE2;
2307     TransposeMicrokernelTester()
2308       .input_stride(4)
2309       .output_stride(2)
2310       .block_width(4)
2311       .block_height(2)
2312       .element_size(8)
2313       .iterations(1)
2314       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2315   }
2316 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_3_4)2317   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_3_4) {
2318     TEST_REQUIRES_X86_SSE2;
2319     for(size_t i = 3; i < 4; ++i){
2320       TransposeMicrokernelTester()
2321         .input_stride(i)
2322         .output_stride(4)
2323         .block_width(i)
2324         .block_height(2)
2325         .element_size(8)
2326         .iterations(1)
2327         .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2328     }
2329   }
2330 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_4_bw_3_4)2331   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_4_bw_3_4) {
2332     TEST_REQUIRES_X86_SSE2;
2333     for(size_t i = 3; i < 4; ++i){
2334       TransposeMicrokernelTester()
2335         .input_stride(i)
2336         .output_stride(4)
2337         .block_width(i)
2338         .block_height(4)
2339         .element_size(8)
2340         .iterations(1)
2341         .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2342     }
2343   }
2344 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_4_bw_2)2345   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_4_bw_2) {
2346     TEST_REQUIRES_X86_SSE2;
2347     TransposeMicrokernelTester()
2348       .input_stride(2)
2349       .output_stride(10)
2350       .block_width(2)
2351       .block_height(4)
2352       .element_size(8)
2353       .iterations(1)
2354       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2355   }
2356 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_3_4_bw_2)2357   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_3_4_bw_2){
2358     TEST_REQUIRES_X86_SSE2;
2359     for(size_t i = 3; i < 4; ++i){
2360       TransposeMicrokernelTester()
2361         .input_stride(19)
2362         .output_stride(i)
2363         .block_width(5)
2364         .block_height(i)
2365         .element_size(8)
2366         .iterations(1)
2367         .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2368     }
2369   }
2370 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_3_4_bw_4)2371   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_3_4_bw_4){
2372     TEST_REQUIRES_X86_SSE2;
2373     for(size_t i = 3; i < 4; ++i){
2374       TransposeMicrokernelTester()
2375         .input_stride(4)
2376         .output_stride(i)
2377         .block_width(4)
2378         .block_height(i)
2379         .element_size(8)
2380         .iterations(1)
2381         .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2382     }
2383   }
2384 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_3_4_bw_3_4)2385   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_3_4_bw_3_4) {
2386     TEST_REQUIRES_X86_SSE2;
2387     for(size_t i = 3; i < 4; ++i){
2388       for(size_t j = 3; j < 4; ++j){
2389         TransposeMicrokernelTester()
2390           .input_stride(j)
2391           .output_stride(i)
2392           .block_width(j)
2393           .block_height(i)
2394           .element_size(8)
2395           .iterations(1)
2396           .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2397       }
2398     }
2399   }
2400 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2_is_4)2401   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2_is_4) {
2402     TEST_REQUIRES_X86_SSE2;
2403     TransposeMicrokernelTester()
2404       .input_stride(4)
2405       .output_stride(2)
2406       .block_width(2)
2407       .block_height(2)
2408       .element_size(8)
2409       .iterations(1)
2410       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2411   }
2412 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2_os_4)2413   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2_os_4) {
2414     TEST_REQUIRES_X86_SSE2;
2415     TransposeMicrokernelTester()
2416       .input_stride(2)
2417       .output_stride(4)
2418       .block_width(2)
2419       .block_height(2)
2420       .element_size(8)
2421       .iterations(1)
2422       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2423   }
2424 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2_is_4_os_4)2425   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2_is_4_os_4) {
2426     TEST_REQUIRES_X86_SSE2;
2427     TransposeMicrokernelTester()
2428       .input_stride(4)
2429       .output_stride(4)
2430       .block_width(2)
2431       .block_height(2)
2432       .element_size(8)
2433       .iterations(1)
2434       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2435   }
2436 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_34_bw_38_ies_19)2437   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_34_bw_38_ies_19) {
2438     TEST_REQUIRES_X86_SSE2;
2439     TransposeMicrokernelTester()
2440       .input_stride(38)
2441       .output_stride(34)
2442       .block_width(38)
2443       .block_height(34)
2444       .element_size(8)
2445       .input_element_stride(19)
2446       .iterations(1)
2447       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2448   }
2449 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_6_bw_10_oes_19)2450   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_6_bw_10_oes_19) {
2451     TEST_REQUIRES_X86_SSE2;
2452     TransposeMicrokernelTester()
2453       .input_stride(10)
2454       .output_stride(6)
2455       .block_width(10)
2456       .block_height(6)
2457       .element_size(8)
2458       .output_element_stride(19)
2459       .iterations(1)
2460       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2461   }
2462 
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_14_bw_46_ies_25_oes_21)2463   TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2464     TEST_REQUIRES_X86_SSE2;
2465     TransposeMicrokernelTester()
2466       .input_stride(51)
2467       .output_stride(20)
2468       .block_width(46)
2469       .block_height(14)
2470       .element_size(8)
2471       .input_element_stride(25)
2472       .output_element_stride(21)
2473       .iterations(1)
2474       .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2475   }
2476 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2477 
2478 
2479 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2)2480   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2) {
2481     TEST_REQUIRES_X86_SSE2;
2482     TransposeMicrokernelTester()
2483       .input_stride(4)
2484       .output_stride(4)
2485       .block_width(2)
2486       .block_height(2)
2487       .element_size(8)
2488       .iterations(1)
2489       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2490   }
2491 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_1_4_bw_1_4)2492   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_1_4_bw_1_4) {
2493     TEST_REQUIRES_X86_SSE2;
2494     for(size_t i = 1; i <= 4; ++i){
2495       for(size_t j = 1; j <= 4; ++j){
2496         TransposeMicrokernelTester()
2497           .input_stride(j * 3)
2498           .output_stride(i * 7)
2499           .block_width(j)
2500           .block_height(i)
2501           .element_size(8)
2502           .iterations(1)
2503           .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2504       }
2505     }
2506   }
2507 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_4)2508   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_4) {
2509     TEST_REQUIRES_X86_SSE2;
2510     TransposeMicrokernelTester()
2511       .input_stride(4)
2512       .output_stride(2)
2513       .block_width(4)
2514       .block_height(2)
2515       .element_size(8)
2516       .iterations(1)
2517       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2518   }
2519 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_3_4)2520   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_3_4) {
2521     TEST_REQUIRES_X86_SSE2;
2522     for(size_t i = 3; i < 4; ++i){
2523       TransposeMicrokernelTester()
2524         .input_stride(i)
2525         .output_stride(4)
2526         .block_width(i)
2527         .block_height(2)
2528         .element_size(8)
2529         .iterations(1)
2530         .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2531     }
2532   }
2533 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_4_bw_3_4)2534   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_4_bw_3_4) {
2535     TEST_REQUIRES_X86_SSE2;
2536     for(size_t i = 3; i < 4; ++i){
2537       TransposeMicrokernelTester()
2538         .input_stride(i)
2539         .output_stride(4)
2540         .block_width(i)
2541         .block_height(4)
2542         .element_size(8)
2543         .iterations(1)
2544         .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2545     }
2546   }
2547 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_4_bw_2)2548   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_4_bw_2) {
2549     TEST_REQUIRES_X86_SSE2;
2550     TransposeMicrokernelTester()
2551       .input_stride(2)
2552       .output_stride(10)
2553       .block_width(2)
2554       .block_height(4)
2555       .element_size(8)
2556       .iterations(1)
2557       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2558   }
2559 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_3_4_bw_2)2560   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_3_4_bw_2){
2561     TEST_REQUIRES_X86_SSE2;
2562     for(size_t i = 3; i < 4; ++i){
2563       TransposeMicrokernelTester()
2564         .input_stride(19)
2565         .output_stride(i)
2566         .block_width(5)
2567         .block_height(i)
2568         .element_size(8)
2569         .iterations(1)
2570         .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2571     }
2572   }
2573 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_3_4_bw_4)2574   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_3_4_bw_4){
2575     TEST_REQUIRES_X86_SSE2;
2576     for(size_t i = 3; i < 4; ++i){
2577       TransposeMicrokernelTester()
2578         .input_stride(4)
2579         .output_stride(i)
2580         .block_width(4)
2581         .block_height(i)
2582         .element_size(8)
2583         .iterations(1)
2584         .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2585     }
2586   }
2587 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_3_4_bw_3_4)2588   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_3_4_bw_3_4) {
2589     TEST_REQUIRES_X86_SSE2;
2590     for(size_t i = 3; i < 4; ++i){
2591       for(size_t j = 3; j < 4; ++j){
2592         TransposeMicrokernelTester()
2593           .input_stride(j)
2594           .output_stride(i)
2595           .block_width(j)
2596           .block_height(i)
2597           .element_size(8)
2598           .iterations(1)
2599           .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2600       }
2601     }
2602   }
2603 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2_is_4)2604   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2_is_4) {
2605     TEST_REQUIRES_X86_SSE2;
2606     TransposeMicrokernelTester()
2607       .input_stride(4)
2608       .output_stride(2)
2609       .block_width(2)
2610       .block_height(2)
2611       .element_size(8)
2612       .iterations(1)
2613       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2614   }
2615 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2_os_4)2616   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2_os_4) {
2617     TEST_REQUIRES_X86_SSE2;
2618     TransposeMicrokernelTester()
2619       .input_stride(2)
2620       .output_stride(4)
2621       .block_width(2)
2622       .block_height(2)
2623       .element_size(8)
2624       .iterations(1)
2625       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2626   }
2627 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2_is_4_os_4)2628   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2_is_4_os_4) {
2629     TEST_REQUIRES_X86_SSE2;
2630     TransposeMicrokernelTester()
2631       .input_stride(4)
2632       .output_stride(4)
2633       .block_width(2)
2634       .block_height(2)
2635       .element_size(8)
2636       .iterations(1)
2637       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2638   }
2639 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_34_bw_38_ies_19)2640   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_34_bw_38_ies_19) {
2641     TEST_REQUIRES_X86_SSE2;
2642     TransposeMicrokernelTester()
2643       .input_stride(38)
2644       .output_stride(34)
2645       .block_width(38)
2646       .block_height(34)
2647       .element_size(8)
2648       .input_element_stride(19)
2649       .iterations(1)
2650       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2651   }
2652 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_6_bw_10_oes_19)2653   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_6_bw_10_oes_19) {
2654     TEST_REQUIRES_X86_SSE2;
2655     TransposeMicrokernelTester()
2656       .input_stride(10)
2657       .output_stride(6)
2658       .block_width(10)
2659       .block_height(6)
2660       .element_size(8)
2661       .output_element_stride(19)
2662       .iterations(1)
2663       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2664   }
2665 
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_14_bw_46_ies_25_oes_21)2666   TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2667     TEST_REQUIRES_X86_SSE2;
2668     TransposeMicrokernelTester()
2669       .input_stride(51)
2670       .output_stride(20)
2671       .block_width(46)
2672       .block_height(14)
2673       .element_size(8)
2674       .input_element_stride(25)
2675       .output_element_stride(21)
2676       .iterations(1)
2677       .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2678   }
2679 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2680 
2681 
2682 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2)2683   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2) {
2684     TEST_REQUIRES_X86_SSE2;
2685     TransposeMicrokernelTester()
2686       .input_stride(4)
2687       .output_stride(4)
2688       .block_width(2)
2689       .block_height(2)
2690       .element_size(8)
2691       .iterations(1)
2692       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2693   }
2694 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_1_4_bw_1_4)2695   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_1_4_bw_1_4) {
2696     TEST_REQUIRES_X86_SSE2;
2697     for(size_t i = 1; i <= 4; ++i){
2698       for(size_t j = 1; j <= 4; ++j){
2699         TransposeMicrokernelTester()
2700           .input_stride(j * 3)
2701           .output_stride(i * 7)
2702           .block_width(j)
2703           .block_height(i)
2704           .element_size(8)
2705           .iterations(1)
2706           .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2707       }
2708     }
2709   }
2710 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_4)2711   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_4) {
2712     TEST_REQUIRES_X86_SSE2;
2713     TransposeMicrokernelTester()
2714       .input_stride(4)
2715       .output_stride(2)
2716       .block_width(4)
2717       .block_height(2)
2718       .element_size(8)
2719       .iterations(1)
2720       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2721   }
2722 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_3_4)2723   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_3_4) {
2724     TEST_REQUIRES_X86_SSE2;
2725     for(size_t i = 3; i < 4; ++i){
2726       TransposeMicrokernelTester()
2727         .input_stride(i)
2728         .output_stride(4)
2729         .block_width(i)
2730         .block_height(2)
2731         .element_size(8)
2732         .iterations(1)
2733         .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2734     }
2735   }
2736 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_4_bw_3_4)2737   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_4_bw_3_4) {
2738     TEST_REQUIRES_X86_SSE2;
2739     for(size_t i = 3; i < 4; ++i){
2740       TransposeMicrokernelTester()
2741         .input_stride(i)
2742         .output_stride(4)
2743         .block_width(i)
2744         .block_height(4)
2745         .element_size(8)
2746         .iterations(1)
2747         .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2748     }
2749   }
2750 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_4_bw_2)2751   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_4_bw_2) {
2752     TEST_REQUIRES_X86_SSE2;
2753     TransposeMicrokernelTester()
2754       .input_stride(2)
2755       .output_stride(10)
2756       .block_width(2)
2757       .block_height(4)
2758       .element_size(8)
2759       .iterations(1)
2760       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2761   }
2762 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_3_4_bw_2)2763   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_3_4_bw_2){
2764     TEST_REQUIRES_X86_SSE2;
2765     for(size_t i = 3; i < 4; ++i){
2766       TransposeMicrokernelTester()
2767         .input_stride(19)
2768         .output_stride(i)
2769         .block_width(5)
2770         .block_height(i)
2771         .element_size(8)
2772         .iterations(1)
2773         .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2774     }
2775   }
2776 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_3_4_bw_4)2777   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_3_4_bw_4){
2778     TEST_REQUIRES_X86_SSE2;
2779     for(size_t i = 3; i < 4; ++i){
2780       TransposeMicrokernelTester()
2781         .input_stride(4)
2782         .output_stride(i)
2783         .block_width(4)
2784         .block_height(i)
2785         .element_size(8)
2786         .iterations(1)
2787         .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2788     }
2789   }
2790 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_3_4_bw_3_4)2791   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_3_4_bw_3_4) {
2792     TEST_REQUIRES_X86_SSE2;
2793     for(size_t i = 3; i < 4; ++i){
2794       for(size_t j = 3; j < 4; ++j){
2795         TransposeMicrokernelTester()
2796           .input_stride(j)
2797           .output_stride(i)
2798           .block_width(j)
2799           .block_height(i)
2800           .element_size(8)
2801           .iterations(1)
2802           .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2803       }
2804     }
2805   }
2806 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2_is_4)2807   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2_is_4) {
2808     TEST_REQUIRES_X86_SSE2;
2809     TransposeMicrokernelTester()
2810       .input_stride(4)
2811       .output_stride(2)
2812       .block_width(2)
2813       .block_height(2)
2814       .element_size(8)
2815       .iterations(1)
2816       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2817   }
2818 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2_os_4)2819   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2_os_4) {
2820     TEST_REQUIRES_X86_SSE2;
2821     TransposeMicrokernelTester()
2822       .input_stride(2)
2823       .output_stride(4)
2824       .block_width(2)
2825       .block_height(2)
2826       .element_size(8)
2827       .iterations(1)
2828       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2829   }
2830 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2_is_4_os_4)2831   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2_is_4_os_4) {
2832     TEST_REQUIRES_X86_SSE2;
2833     TransposeMicrokernelTester()
2834       .input_stride(4)
2835       .output_stride(4)
2836       .block_width(2)
2837       .block_height(2)
2838       .element_size(8)
2839       .iterations(1)
2840       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2841   }
2842 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_34_bw_38_ies_19)2843   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_34_bw_38_ies_19) {
2844     TEST_REQUIRES_X86_SSE2;
2845     TransposeMicrokernelTester()
2846       .input_stride(38)
2847       .output_stride(34)
2848       .block_width(38)
2849       .block_height(34)
2850       .element_size(8)
2851       .input_element_stride(19)
2852       .iterations(1)
2853       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2854   }
2855 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_6_bw_10_oes_19)2856   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_6_bw_10_oes_19) {
2857     TEST_REQUIRES_X86_SSE2;
2858     TransposeMicrokernelTester()
2859       .input_stride(10)
2860       .output_stride(6)
2861       .block_width(10)
2862       .block_height(6)
2863       .element_size(8)
2864       .output_element_stride(19)
2865       .iterations(1)
2866       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2867   }
2868 
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_14_bw_46_ies_25_oes_21)2869   TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2870     TEST_REQUIRES_X86_SSE2;
2871     TransposeMicrokernelTester()
2872       .input_stride(51)
2873       .output_stride(20)
2874       .block_width(46)
2875       .block_height(14)
2876       .element_size(8)
2877       .input_element_stride(25)
2878       .output_element_stride(21)
2879       .iterations(1)
2880       .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2881   }
2882 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2883 
2884 
2885 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2)2886   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2) {
2887     TEST_REQUIRES_X86_SSE2;
2888     TransposeMicrokernelTester()
2889       .input_stride(4)
2890       .output_stride(4)
2891       .block_width(2)
2892       .block_height(2)
2893       .element_size(8)
2894       .iterations(1)
2895       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2896   }
2897 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_1_4_bw_1_4)2898   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_1_4_bw_1_4) {
2899     TEST_REQUIRES_X86_SSE2;
2900     for(size_t i = 1; i <= 4; ++i){
2901       for(size_t j = 1; j <= 4; ++j){
2902         TransposeMicrokernelTester()
2903           .input_stride(j * 3)
2904           .output_stride(i * 7)
2905           .block_width(j)
2906           .block_height(i)
2907           .element_size(8)
2908           .iterations(1)
2909           .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2910       }
2911     }
2912   }
2913 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_4)2914   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_4) {
2915     TEST_REQUIRES_X86_SSE2;
2916     TransposeMicrokernelTester()
2917       .input_stride(4)
2918       .output_stride(2)
2919       .block_width(4)
2920       .block_height(2)
2921       .element_size(8)
2922       .iterations(1)
2923       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2924   }
2925 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_3_4)2926   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_3_4) {
2927     TEST_REQUIRES_X86_SSE2;
2928     for(size_t i = 3; i < 4; ++i){
2929       TransposeMicrokernelTester()
2930         .input_stride(i)
2931         .output_stride(4)
2932         .block_width(i)
2933         .block_height(2)
2934         .element_size(8)
2935         .iterations(1)
2936         .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2937     }
2938   }
2939 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_4_bw_3_4)2940   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_4_bw_3_4) {
2941     TEST_REQUIRES_X86_SSE2;
2942     for(size_t i = 3; i < 4; ++i){
2943       TransposeMicrokernelTester()
2944         .input_stride(i)
2945         .output_stride(4)
2946         .block_width(i)
2947         .block_height(4)
2948         .element_size(8)
2949         .iterations(1)
2950         .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2951     }
2952   }
2953 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_4_bw_2)2954   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_4_bw_2) {
2955     TEST_REQUIRES_X86_SSE2;
2956     TransposeMicrokernelTester()
2957       .input_stride(2)
2958       .output_stride(10)
2959       .block_width(2)
2960       .block_height(4)
2961       .element_size(8)
2962       .iterations(1)
2963       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2964   }
2965 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_3_4_bw_2)2966   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_3_4_bw_2){
2967     TEST_REQUIRES_X86_SSE2;
2968     for(size_t i = 3; i < 4; ++i){
2969       TransposeMicrokernelTester()
2970         .input_stride(19)
2971         .output_stride(i)
2972         .block_width(5)
2973         .block_height(i)
2974         .element_size(8)
2975         .iterations(1)
2976         .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2977     }
2978   }
2979 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_3_4_bw_4)2980   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_3_4_bw_4){
2981     TEST_REQUIRES_X86_SSE2;
2982     for(size_t i = 3; i < 4; ++i){
2983       TransposeMicrokernelTester()
2984         .input_stride(4)
2985         .output_stride(i)
2986         .block_width(4)
2987         .block_height(i)
2988         .element_size(8)
2989         .iterations(1)
2990         .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2991     }
2992   }
2993 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_3_4_bw_3_4)2994   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_3_4_bw_3_4) {
2995     TEST_REQUIRES_X86_SSE2;
2996     for(size_t i = 3; i < 4; ++i){
2997       for(size_t j = 3; j < 4; ++j){
2998         TransposeMicrokernelTester()
2999           .input_stride(j)
3000           .output_stride(i)
3001           .block_width(j)
3002           .block_height(i)
3003           .element_size(8)
3004           .iterations(1)
3005           .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3006       }
3007     }
3008   }
3009 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2_is_4)3010   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2_is_4) {
3011     TEST_REQUIRES_X86_SSE2;
3012     TransposeMicrokernelTester()
3013       .input_stride(4)
3014       .output_stride(2)
3015       .block_width(2)
3016       .block_height(2)
3017       .element_size(8)
3018       .iterations(1)
3019       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3020   }
3021 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2_os_4)3022   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2_os_4) {
3023     TEST_REQUIRES_X86_SSE2;
3024     TransposeMicrokernelTester()
3025       .input_stride(2)
3026       .output_stride(4)
3027       .block_width(2)
3028       .block_height(2)
3029       .element_size(8)
3030       .iterations(1)
3031       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3032   }
3033 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2_is_4_os_4)3034   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2_is_4_os_4) {
3035     TEST_REQUIRES_X86_SSE2;
3036     TransposeMicrokernelTester()
3037       .input_stride(4)
3038       .output_stride(4)
3039       .block_width(2)
3040       .block_height(2)
3041       .element_size(8)
3042       .iterations(1)
3043       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3044   }
3045 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_34_bw_38_ies_19)3046   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_34_bw_38_ies_19) {
3047     TEST_REQUIRES_X86_SSE2;
3048     TransposeMicrokernelTester()
3049       .input_stride(38)
3050       .output_stride(34)
3051       .block_width(38)
3052       .block_height(34)
3053       .element_size(8)
3054       .input_element_stride(19)
3055       .iterations(1)
3056       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3057   }
3058 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_6_bw_10_oes_19)3059   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_6_bw_10_oes_19) {
3060     TEST_REQUIRES_X86_SSE2;
3061     TransposeMicrokernelTester()
3062       .input_stride(10)
3063       .output_stride(6)
3064       .block_width(10)
3065       .block_height(6)
3066       .element_size(8)
3067       .output_element_stride(19)
3068       .iterations(1)
3069       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3070   }
3071 
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_14_bw_46_ies_25_oes_21)3072   TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
3073     TEST_REQUIRES_X86_SSE2;
3074     TransposeMicrokernelTester()
3075       .input_stride(51)
3076       .output_stride(20)
3077       .block_width(46)
3078       .block_height(14)
3079       .element_size(8)
3080       .input_element_stride(25)
3081       .output_element_stride(21)
3082       .iterations(1)
3083       .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3084   }
3085 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3086