1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x64-transpose.yaml
8 // Generator: tools/generate-transpose-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18
19
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2)20 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(4)
23 .output_stride(2)
24 .block_width(2)
25 .block_height(1)
26 .element_size(8)
27 .iterations(1)
28 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
29 }
30
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_2_bw_1_4)31 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_2_bw_1_4) {
32 for(size_t i = 1; i <= 2; ++i){
33 for(size_t j = 1; j <= 4; ++j){
34 TransposeMicrokernelTester()
35 .input_stride(j * 3)
36 .output_stride(i * 7)
37 .block_width(j)
38 .block_height(i)
39 .element_size(8)
40 .iterations(1)
41 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
42 }
43 }
44 }
45
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_4)46 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_4) {
47 TransposeMicrokernelTester()
48 .input_stride(4)
49 .output_stride(1)
50 .block_width(4)
51 .block_height(1)
52 .element_size(8)
53 .iterations(1)
54 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
55 }
56
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_3_4)57 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_3_4) {
58 for(size_t i = 3; i < 4; ++i){
59 TransposeMicrokernelTester()
60 .input_stride(i)
61 .output_stride(2)
62 .block_width(i)
63 .block_height(1)
64 .element_size(8)
65 .iterations(1)
66 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
67 }
68 }
69
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_bw_3_4)70 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_bw_3_4) {
71 for(size_t i = 3; i < 4; ++i){
72 TransposeMicrokernelTester()
73 .input_stride(i)
74 .output_stride(2)
75 .block_width(i)
76 .block_height(2)
77 .element_size(8)
78 .iterations(1)
79 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
80 }
81 }
82
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_bw_2)83 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_bw_2) {
84 TransposeMicrokernelTester()
85 .input_stride(2)
86 .output_stride(7)
87 .block_width(2)
88 .block_height(2)
89 .element_size(8)
90 .iterations(1)
91 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
92 }
93
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_2_bw_2)94 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_2_bw_2){
95 for(size_t i = 2; i < 2; ++i){
96 TransposeMicrokernelTester()
97 .input_stride(19)
98 .output_stride(i)
99 .block_width(5)
100 .block_height(i)
101 .element_size(8)
102 .iterations(1)
103 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
104 }
105 }
106
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_2_bw_4)107 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_2_bw_4){
108 for(size_t i = 2; i < 2; ++i){
109 TransposeMicrokernelTester()
110 .input_stride(4)
111 .output_stride(i)
112 .block_width(4)
113 .block_height(i)
114 .element_size(8)
115 .iterations(1)
116 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
117 }
118 }
119
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_2_2_bw_3_4)120 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_2_2_bw_3_4) {
121 for(size_t i = 2; i < 2; ++i){
122 for(size_t j = 3; j < 4; ++j){
123 TransposeMicrokernelTester()
124 .input_stride(j)
125 .output_stride(i)
126 .block_width(j)
127 .block_height(i)
128 .element_size(8)
129 .iterations(1)
130 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
131 }
132 }
133 }
134
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2_is_4)135 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_is_4) {
136 TransposeMicrokernelTester()
137 .input_stride(4)
138 .output_stride(1)
139 .block_width(2)
140 .block_height(1)
141 .element_size(8)
142 .iterations(1)
143 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
144 }
145
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2_os_2)146 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(2)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .element_size(8)
153 .iterations(1)
154 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
155 }
156
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_1_bw_2_is_4_os_2)157 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_1_bw_2_is_4_os_2) {
158 TransposeMicrokernelTester()
159 .input_stride(4)
160 .output_stride(2)
161 .block_width(2)
162 .block_height(1)
163 .element_size(8)
164 .iterations(1)
165 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
166 }
167
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_17_bw_38_ies_19)168 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_17_bw_38_ies_19) {
169 TransposeMicrokernelTester()
170 .input_stride(38)
171 .output_stride(17)
172 .block_width(38)
173 .block_height(17)
174 .element_size(8)
175 .input_element_stride(19)
176 .iterations(1)
177 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
178 }
179
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_3_bw_10_oes_19)180 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_3_bw_10_oes_19) {
181 TransposeMicrokernelTester()
182 .input_stride(10)
183 .output_stride(3)
184 .block_width(10)
185 .block_height(3)
186 .element_size(8)
187 .output_element_stride(19)
188 .iterations(1)
189 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
190 }
191
TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8,bh_7_bw_46_ies_25_oes_21)192 TEST(X64_TRANSPOSEC__1X2_SCALAR_FLOAT_8, bh_7_bw_46_ies_25_oes_21) {
193 TransposeMicrokernelTester()
194 .input_stride(51)
195 .output_stride(13)
196 .block_width(46)
197 .block_height(7)
198 .element_size(8)
199 .input_element_stride(25)
200 .output_element_stride(21)
201 .iterations(1)
202 .Test(xnn_x64_transposec_ukernel__1x2_scalar_float);
203 }
204
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2)205 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2) {
206 TransposeMicrokernelTester()
207 .input_stride(4)
208 .output_stride(2)
209 .block_width(2)
210 .block_height(1)
211 .element_size(8)
212 .iterations(1)
213 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
214 }
215
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_2_bw_1_4)216 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_2_bw_1_4) {
217 for(size_t i = 1; i <= 2; ++i){
218 for(size_t j = 1; j <= 4; ++j){
219 TransposeMicrokernelTester()
220 .input_stride(j * 3)
221 .output_stride(i * 7)
222 .block_width(j)
223 .block_height(i)
224 .element_size(8)
225 .iterations(1)
226 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
227 }
228 }
229 }
230
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_4)231 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_4) {
232 TransposeMicrokernelTester()
233 .input_stride(4)
234 .output_stride(1)
235 .block_width(4)
236 .block_height(1)
237 .element_size(8)
238 .iterations(1)
239 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
240 }
241
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_3_4)242 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_3_4) {
243 for(size_t i = 3; i < 4; ++i){
244 TransposeMicrokernelTester()
245 .input_stride(i)
246 .output_stride(2)
247 .block_width(i)
248 .block_height(1)
249 .element_size(8)
250 .iterations(1)
251 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
252 }
253 }
254
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_bw_3_4)255 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_bw_3_4) {
256 for(size_t i = 3; i < 4; ++i){
257 TransposeMicrokernelTester()
258 .input_stride(i)
259 .output_stride(2)
260 .block_width(i)
261 .block_height(2)
262 .element_size(8)
263 .iterations(1)
264 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
265 }
266 }
267
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_bw_2)268 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_bw_2) {
269 TransposeMicrokernelTester()
270 .input_stride(2)
271 .output_stride(7)
272 .block_width(2)
273 .block_height(2)
274 .element_size(8)
275 .iterations(1)
276 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
277 }
278
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_2_bw_2)279 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_2_bw_2){
280 for(size_t i = 2; i < 2; ++i){
281 TransposeMicrokernelTester()
282 .input_stride(19)
283 .output_stride(i)
284 .block_width(5)
285 .block_height(i)
286 .element_size(8)
287 .iterations(1)
288 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
289 }
290 }
291
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_2_bw_4)292 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_2_bw_4){
293 for(size_t i = 2; i < 2; ++i){
294 TransposeMicrokernelTester()
295 .input_stride(4)
296 .output_stride(i)
297 .block_width(4)
298 .block_height(i)
299 .element_size(8)
300 .iterations(1)
301 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
302 }
303 }
304
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_2_2_bw_3_4)305 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_2_2_bw_3_4) {
306 for(size_t i = 2; i < 2; ++i){
307 for(size_t j = 3; j < 4; ++j){
308 TransposeMicrokernelTester()
309 .input_stride(j)
310 .output_stride(i)
311 .block_width(j)
312 .block_height(i)
313 .element_size(8)
314 .iterations(1)
315 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
316 }
317 }
318 }
319
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2_is_4)320 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_is_4) {
321 TransposeMicrokernelTester()
322 .input_stride(4)
323 .output_stride(1)
324 .block_width(2)
325 .block_height(1)
326 .element_size(8)
327 .iterations(1)
328 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
329 }
330
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2_os_2)331 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_os_2) {
332 TransposeMicrokernelTester()
333 .input_stride(2)
334 .output_stride(2)
335 .block_width(2)
336 .block_height(1)
337 .element_size(8)
338 .iterations(1)
339 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
340 }
341
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_1_bw_2_is_4_os_2)342 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_1_bw_2_is_4_os_2) {
343 TransposeMicrokernelTester()
344 .input_stride(4)
345 .output_stride(2)
346 .block_width(2)
347 .block_height(1)
348 .element_size(8)
349 .iterations(1)
350 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
351 }
352
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_17_bw_38_ies_19)353 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_17_bw_38_ies_19) {
354 TransposeMicrokernelTester()
355 .input_stride(38)
356 .output_stride(17)
357 .block_width(38)
358 .block_height(17)
359 .element_size(8)
360 .input_element_stride(19)
361 .iterations(1)
362 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
363 }
364
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_3_bw_10_oes_19)365 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_3_bw_10_oes_19) {
366 TransposeMicrokernelTester()
367 .input_stride(10)
368 .output_stride(3)
369 .block_width(10)
370 .block_height(3)
371 .element_size(8)
372 .output_element_stride(19)
373 .iterations(1)
374 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
375 }
376
TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8,bh_7_bw_46_ies_25_oes_21)377 TEST(X64_TRANSPOSEC__1X2_SCALAR_INT_8, bh_7_bw_46_ies_25_oes_21) {
378 TransposeMicrokernelTester()
379 .input_stride(51)
380 .output_stride(13)
381 .block_width(46)
382 .block_height(7)
383 .element_size(8)
384 .input_element_stride(25)
385 .output_element_stride(21)
386 .iterations(1)
387 .Test(xnn_x64_transposec_ukernel__1x2_scalar_int);
388 }
389
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1)390 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1) {
391 TransposeMicrokernelTester()
392 .input_stride(2)
393 .output_stride(4)
394 .block_width(1)
395 .block_height(2)
396 .element_size(8)
397 .iterations(1)
398 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
399 }
400
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_1_4_bw_1_2)401 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_1_4_bw_1_2) {
402 for(size_t i = 1; i <= 4; ++i){
403 for(size_t j = 1; j <= 2; ++j){
404 TransposeMicrokernelTester()
405 .input_stride(j * 3)
406 .output_stride(i * 7)
407 .block_width(j)
408 .block_height(i)
409 .element_size(8)
410 .iterations(1)
411 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
412 }
413 }
414 }
415
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_2)416 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_2) {
417 TransposeMicrokernelTester()
418 .input_stride(2)
419 .output_stride(2)
420 .block_width(2)
421 .block_height(2)
422 .element_size(8)
423 .iterations(1)
424 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
425 }
426
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_2_2)427 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_2_2) {
428 for(size_t i = 2; i < 2; ++i){
429 TransposeMicrokernelTester()
430 .input_stride(i)
431 .output_stride(4)
432 .block_width(i)
433 .block_height(2)
434 .element_size(8)
435 .iterations(1)
436 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
437 }
438 }
439
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_4_bw_2_2)440 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_4_bw_2_2) {
441 for(size_t i = 2; i < 2; ++i){
442 TransposeMicrokernelTester()
443 .input_stride(i)
444 .output_stride(4)
445 .block_width(i)
446 .block_height(4)
447 .element_size(8)
448 .iterations(1)
449 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
450 }
451 }
452
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_4_bw_1)453 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_4_bw_1) {
454 TransposeMicrokernelTester()
455 .input_stride(1)
456 .output_stride(10)
457 .block_width(1)
458 .block_height(4)
459 .element_size(8)
460 .iterations(1)
461 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
462 }
463
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_3_4_bw_1)464 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_3_4_bw_1){
465 for(size_t i = 3; i < 4; ++i){
466 TransposeMicrokernelTester()
467 .input_stride(18)
468 .output_stride(i)
469 .block_width(4)
470 .block_height(i)
471 .element_size(8)
472 .iterations(1)
473 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
474 }
475 }
476
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_3_4_bw_2)477 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_3_4_bw_2){
478 for(size_t i = 3; i < 4; ++i){
479 TransposeMicrokernelTester()
480 .input_stride(2)
481 .output_stride(i)
482 .block_width(2)
483 .block_height(i)
484 .element_size(8)
485 .iterations(1)
486 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
487 }
488 }
489
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_3_4_bw_2_2)490 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_3_4_bw_2_2) {
491 for(size_t i = 3; i < 4; ++i){
492 for(size_t j = 2; j < 2; ++j){
493 TransposeMicrokernelTester()
494 .input_stride(j)
495 .output_stride(i)
496 .block_width(j)
497 .block_height(i)
498 .element_size(8)
499 .iterations(1)
500 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
501 }
502 }
503 }
504
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1_is_2)505 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_is_2) {
506 TransposeMicrokernelTester()
507 .input_stride(2)
508 .output_stride(2)
509 .block_width(1)
510 .block_height(2)
511 .element_size(8)
512 .iterations(1)
513 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
514 }
515
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1_os_4)516 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_os_4) {
517 TransposeMicrokernelTester()
518 .input_stride(1)
519 .output_stride(4)
520 .block_width(1)
521 .block_height(2)
522 .element_size(8)
523 .iterations(1)
524 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
525 }
526
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_2_bw_1_is_2_os_4)527 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_2_bw_1_is_2_os_4) {
528 TransposeMicrokernelTester()
529 .input_stride(2)
530 .output_stride(4)
531 .block_width(1)
532 .block_height(2)
533 .element_size(8)
534 .iterations(1)
535 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
536 }
537
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_34_bw_19_ies_19)538 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_34_bw_19_ies_19) {
539 TransposeMicrokernelTester()
540 .input_stride(19)
541 .output_stride(34)
542 .block_width(19)
543 .block_height(34)
544 .element_size(8)
545 .input_element_stride(19)
546 .iterations(1)
547 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
548 }
549
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_6_bw_5_oes_19)550 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_6_bw_5_oes_19) {
551 TransposeMicrokernelTester()
552 .input_stride(5)
553 .output_stride(6)
554 .block_width(5)
555 .block_height(6)
556 .element_size(8)
557 .output_element_stride(19)
558 .iterations(1)
559 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
560 }
561
TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8,bh_14_bw_23_ies_25_oes_21)562 TEST(X64_TRANSPOSEC__2X1_SCALAR_FLOAT_8, bh_14_bw_23_ies_25_oes_21) {
563 TransposeMicrokernelTester()
564 .input_stride(28)
565 .output_stride(20)
566 .block_width(23)
567 .block_height(14)
568 .element_size(8)
569 .input_element_stride(25)
570 .output_element_stride(21)
571 .iterations(1)
572 .Test(xnn_x64_transposec_ukernel__2x1_scalar_float);
573 }
574
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1)575 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1) {
576 TransposeMicrokernelTester()
577 .input_stride(2)
578 .output_stride(4)
579 .block_width(1)
580 .block_height(2)
581 .element_size(8)
582 .iterations(1)
583 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
584 }
585
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_1_4_bw_1_2)586 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_1_4_bw_1_2) {
587 for(size_t i = 1; i <= 4; ++i){
588 for(size_t j = 1; j <= 2; ++j){
589 TransposeMicrokernelTester()
590 .input_stride(j * 3)
591 .output_stride(i * 7)
592 .block_width(j)
593 .block_height(i)
594 .element_size(8)
595 .iterations(1)
596 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
597 }
598 }
599 }
600
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_2)601 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_2) {
602 TransposeMicrokernelTester()
603 .input_stride(2)
604 .output_stride(2)
605 .block_width(2)
606 .block_height(2)
607 .element_size(8)
608 .iterations(1)
609 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
610 }
611
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_2_2)612 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_2_2) {
613 for(size_t i = 2; i < 2; ++i){
614 TransposeMicrokernelTester()
615 .input_stride(i)
616 .output_stride(4)
617 .block_width(i)
618 .block_height(2)
619 .element_size(8)
620 .iterations(1)
621 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
622 }
623 }
624
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_4_bw_2_2)625 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_4_bw_2_2) {
626 for(size_t i = 2; i < 2; ++i){
627 TransposeMicrokernelTester()
628 .input_stride(i)
629 .output_stride(4)
630 .block_width(i)
631 .block_height(4)
632 .element_size(8)
633 .iterations(1)
634 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
635 }
636 }
637
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_4_bw_1)638 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_4_bw_1) {
639 TransposeMicrokernelTester()
640 .input_stride(1)
641 .output_stride(10)
642 .block_width(1)
643 .block_height(4)
644 .element_size(8)
645 .iterations(1)
646 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
647 }
648
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_3_4_bw_1)649 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_3_4_bw_1){
650 for(size_t i = 3; i < 4; ++i){
651 TransposeMicrokernelTester()
652 .input_stride(18)
653 .output_stride(i)
654 .block_width(4)
655 .block_height(i)
656 .element_size(8)
657 .iterations(1)
658 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
659 }
660 }
661
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_3_4_bw_2)662 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_3_4_bw_2){
663 for(size_t i = 3; i < 4; ++i){
664 TransposeMicrokernelTester()
665 .input_stride(2)
666 .output_stride(i)
667 .block_width(2)
668 .block_height(i)
669 .element_size(8)
670 .iterations(1)
671 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
672 }
673 }
674
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_3_4_bw_2_2)675 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_3_4_bw_2_2) {
676 for(size_t i = 3; i < 4; ++i){
677 for(size_t j = 2; j < 2; ++j){
678 TransposeMicrokernelTester()
679 .input_stride(j)
680 .output_stride(i)
681 .block_width(j)
682 .block_height(i)
683 .element_size(8)
684 .iterations(1)
685 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
686 }
687 }
688 }
689
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1_is_2)690 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_is_2) {
691 TransposeMicrokernelTester()
692 .input_stride(2)
693 .output_stride(2)
694 .block_width(1)
695 .block_height(2)
696 .element_size(8)
697 .iterations(1)
698 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
699 }
700
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1_os_4)701 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_os_4) {
702 TransposeMicrokernelTester()
703 .input_stride(1)
704 .output_stride(4)
705 .block_width(1)
706 .block_height(2)
707 .element_size(8)
708 .iterations(1)
709 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
710 }
711
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_2_bw_1_is_2_os_4)712 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_2_bw_1_is_2_os_4) {
713 TransposeMicrokernelTester()
714 .input_stride(2)
715 .output_stride(4)
716 .block_width(1)
717 .block_height(2)
718 .element_size(8)
719 .iterations(1)
720 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
721 }
722
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_34_bw_19_ies_19)723 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_34_bw_19_ies_19) {
724 TransposeMicrokernelTester()
725 .input_stride(19)
726 .output_stride(34)
727 .block_width(19)
728 .block_height(34)
729 .element_size(8)
730 .input_element_stride(19)
731 .iterations(1)
732 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
733 }
734
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_6_bw_5_oes_19)735 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_6_bw_5_oes_19) {
736 TransposeMicrokernelTester()
737 .input_stride(5)
738 .output_stride(6)
739 .block_width(5)
740 .block_height(6)
741 .element_size(8)
742 .output_element_stride(19)
743 .iterations(1)
744 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
745 }
746
TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8,bh_14_bw_23_ies_25_oes_21)747 TEST(X64_TRANSPOSEC__2X1_SCALAR_INT_8, bh_14_bw_23_ies_25_oes_21) {
748 TransposeMicrokernelTester()
749 .input_stride(28)
750 .output_stride(20)
751 .block_width(23)
752 .block_height(14)
753 .element_size(8)
754 .input_element_stride(25)
755 .output_element_stride(21)
756 .iterations(1)
757 .Test(xnn_x64_transposec_ukernel__2x1_scalar_int);
758 }
759
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2)760 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2) {
761 TransposeMicrokernelTester()
762 .input_stride(4)
763 .output_stride(4)
764 .block_width(2)
765 .block_height(2)
766 .element_size(8)
767 .iterations(1)
768 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
769 }
770
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_1_4_bw_1_4)771 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_1_4_bw_1_4) {
772 for(size_t i = 1; i <= 4; ++i){
773 for(size_t j = 1; j <= 4; ++j){
774 TransposeMicrokernelTester()
775 .input_stride(j * 3)
776 .output_stride(i * 7)
777 .block_width(j)
778 .block_height(i)
779 .element_size(8)
780 .iterations(1)
781 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
782 }
783 }
784 }
785
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_4)786 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_4) {
787 TransposeMicrokernelTester()
788 .input_stride(4)
789 .output_stride(2)
790 .block_width(4)
791 .block_height(2)
792 .element_size(8)
793 .iterations(1)
794 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
795 }
796
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_3_4)797 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_3_4) {
798 for(size_t i = 3; i < 4; ++i){
799 TransposeMicrokernelTester()
800 .input_stride(i)
801 .output_stride(4)
802 .block_width(i)
803 .block_height(2)
804 .element_size(8)
805 .iterations(1)
806 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
807 }
808 }
809
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_4_bw_3_4)810 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_4_bw_3_4) {
811 for(size_t i = 3; i < 4; ++i){
812 TransposeMicrokernelTester()
813 .input_stride(i)
814 .output_stride(4)
815 .block_width(i)
816 .block_height(4)
817 .element_size(8)
818 .iterations(1)
819 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
820 }
821 }
822
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_4_bw_2)823 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_4_bw_2) {
824 TransposeMicrokernelTester()
825 .input_stride(2)
826 .output_stride(10)
827 .block_width(2)
828 .block_height(4)
829 .element_size(8)
830 .iterations(1)
831 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
832 }
833
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_3_4_bw_2)834 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_3_4_bw_2){
835 for(size_t i = 3; i < 4; ++i){
836 TransposeMicrokernelTester()
837 .input_stride(19)
838 .output_stride(i)
839 .block_width(5)
840 .block_height(i)
841 .element_size(8)
842 .iterations(1)
843 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
844 }
845 }
846
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_3_4_bw_4)847 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_3_4_bw_4){
848 for(size_t i = 3; i < 4; ++i){
849 TransposeMicrokernelTester()
850 .input_stride(4)
851 .output_stride(i)
852 .block_width(4)
853 .block_height(i)
854 .element_size(8)
855 .iterations(1)
856 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
857 }
858 }
859
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_3_4_bw_3_4)860 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_3_4_bw_3_4) {
861 for(size_t i = 3; i < 4; ++i){
862 for(size_t j = 3; j < 4; ++j){
863 TransposeMicrokernelTester()
864 .input_stride(j)
865 .output_stride(i)
866 .block_width(j)
867 .block_height(i)
868 .element_size(8)
869 .iterations(1)
870 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
871 }
872 }
873 }
874
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2_is_4)875 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_is_4) {
876 TransposeMicrokernelTester()
877 .input_stride(4)
878 .output_stride(2)
879 .block_width(2)
880 .block_height(2)
881 .element_size(8)
882 .iterations(1)
883 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
884 }
885
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2_os_4)886 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_os_4) {
887 TransposeMicrokernelTester()
888 .input_stride(2)
889 .output_stride(4)
890 .block_width(2)
891 .block_height(2)
892 .element_size(8)
893 .iterations(1)
894 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
895 }
896
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_2_bw_2_is_4_os_4)897 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_2_bw_2_is_4_os_4) {
898 TransposeMicrokernelTester()
899 .input_stride(4)
900 .output_stride(4)
901 .block_width(2)
902 .block_height(2)
903 .element_size(8)
904 .iterations(1)
905 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
906 }
907
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_34_bw_38_ies_19)908 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_34_bw_38_ies_19) {
909 TransposeMicrokernelTester()
910 .input_stride(38)
911 .output_stride(34)
912 .block_width(38)
913 .block_height(34)
914 .element_size(8)
915 .input_element_stride(19)
916 .iterations(1)
917 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
918 }
919
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_6_bw_10_oes_19)920 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_6_bw_10_oes_19) {
921 TransposeMicrokernelTester()
922 .input_stride(10)
923 .output_stride(6)
924 .block_width(10)
925 .block_height(6)
926 .element_size(8)
927 .output_element_stride(19)
928 .iterations(1)
929 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
930 }
931
TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8,bh_14_bw_46_ies_25_oes_21)932 TEST(X64_TRANSPOSEC__2X2_SCALAR_FLOAT_8, bh_14_bw_46_ies_25_oes_21) {
933 TransposeMicrokernelTester()
934 .input_stride(51)
935 .output_stride(20)
936 .block_width(46)
937 .block_height(14)
938 .element_size(8)
939 .input_element_stride(25)
940 .output_element_stride(21)
941 .iterations(1)
942 .Test(xnn_x64_transposec_ukernel__2x2_scalar_float);
943 }
944
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2)945 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2) {
946 TransposeMicrokernelTester()
947 .input_stride(4)
948 .output_stride(4)
949 .block_width(2)
950 .block_height(2)
951 .element_size(8)
952 .iterations(1)
953 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
954 }
955
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_1_4_bw_1_4)956 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_1_4_bw_1_4) {
957 for(size_t i = 1; i <= 4; ++i){
958 for(size_t j = 1; j <= 4; ++j){
959 TransposeMicrokernelTester()
960 .input_stride(j * 3)
961 .output_stride(i * 7)
962 .block_width(j)
963 .block_height(i)
964 .element_size(8)
965 .iterations(1)
966 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
967 }
968 }
969 }
970
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_4)971 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_4) {
972 TransposeMicrokernelTester()
973 .input_stride(4)
974 .output_stride(2)
975 .block_width(4)
976 .block_height(2)
977 .element_size(8)
978 .iterations(1)
979 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
980 }
981
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_3_4)982 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_3_4) {
983 for(size_t i = 3; i < 4; ++i){
984 TransposeMicrokernelTester()
985 .input_stride(i)
986 .output_stride(4)
987 .block_width(i)
988 .block_height(2)
989 .element_size(8)
990 .iterations(1)
991 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
992 }
993 }
994
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_4_bw_3_4)995 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_4_bw_3_4) {
996 for(size_t i = 3; i < 4; ++i){
997 TransposeMicrokernelTester()
998 .input_stride(i)
999 .output_stride(4)
1000 .block_width(i)
1001 .block_height(4)
1002 .element_size(8)
1003 .iterations(1)
1004 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1005 }
1006 }
1007
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_4_bw_2)1008 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_4_bw_2) {
1009 TransposeMicrokernelTester()
1010 .input_stride(2)
1011 .output_stride(10)
1012 .block_width(2)
1013 .block_height(4)
1014 .element_size(8)
1015 .iterations(1)
1016 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1017 }
1018
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_3_4_bw_2)1019 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_3_4_bw_2){
1020 for(size_t i = 3; i < 4; ++i){
1021 TransposeMicrokernelTester()
1022 .input_stride(19)
1023 .output_stride(i)
1024 .block_width(5)
1025 .block_height(i)
1026 .element_size(8)
1027 .iterations(1)
1028 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1029 }
1030 }
1031
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_3_4_bw_4)1032 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_3_4_bw_4){
1033 for(size_t i = 3; i < 4; ++i){
1034 TransposeMicrokernelTester()
1035 .input_stride(4)
1036 .output_stride(i)
1037 .block_width(4)
1038 .block_height(i)
1039 .element_size(8)
1040 .iterations(1)
1041 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1042 }
1043 }
1044
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_3_4_bw_3_4)1045 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_3_4_bw_3_4) {
1046 for(size_t i = 3; i < 4; ++i){
1047 for(size_t j = 3; j < 4; ++j){
1048 TransposeMicrokernelTester()
1049 .input_stride(j)
1050 .output_stride(i)
1051 .block_width(j)
1052 .block_height(i)
1053 .element_size(8)
1054 .iterations(1)
1055 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1056 }
1057 }
1058 }
1059
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2_is_4)1060 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_is_4) {
1061 TransposeMicrokernelTester()
1062 .input_stride(4)
1063 .output_stride(2)
1064 .block_width(2)
1065 .block_height(2)
1066 .element_size(8)
1067 .iterations(1)
1068 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1069 }
1070
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2_os_4)1071 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_os_4) {
1072 TransposeMicrokernelTester()
1073 .input_stride(2)
1074 .output_stride(4)
1075 .block_width(2)
1076 .block_height(2)
1077 .element_size(8)
1078 .iterations(1)
1079 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1080 }
1081
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_2_bw_2_is_4_os_4)1082 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_2_bw_2_is_4_os_4) {
1083 TransposeMicrokernelTester()
1084 .input_stride(4)
1085 .output_stride(4)
1086 .block_width(2)
1087 .block_height(2)
1088 .element_size(8)
1089 .iterations(1)
1090 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1091 }
1092
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_34_bw_38_ies_19)1093 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_34_bw_38_ies_19) {
1094 TransposeMicrokernelTester()
1095 .input_stride(38)
1096 .output_stride(34)
1097 .block_width(38)
1098 .block_height(34)
1099 .element_size(8)
1100 .input_element_stride(19)
1101 .iterations(1)
1102 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1103 }
1104
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_6_bw_10_oes_19)1105 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_6_bw_10_oes_19) {
1106 TransposeMicrokernelTester()
1107 .input_stride(10)
1108 .output_stride(6)
1109 .block_width(10)
1110 .block_height(6)
1111 .element_size(8)
1112 .output_element_stride(19)
1113 .iterations(1)
1114 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1115 }
1116
TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8,bh_14_bw_46_ies_25_oes_21)1117 TEST(X64_TRANSPOSEC__2X2_SCALAR_INT_8, bh_14_bw_46_ies_25_oes_21) {
1118 TransposeMicrokernelTester()
1119 .input_stride(51)
1120 .output_stride(20)
1121 .block_width(46)
1122 .block_height(14)
1123 .element_size(8)
1124 .input_element_stride(25)
1125 .output_element_stride(21)
1126 .iterations(1)
1127 .Test(xnn_x64_transposec_ukernel__2x2_scalar_int);
1128 }
1129
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1)1130 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1) {
1131 TransposeMicrokernelTester()
1132 .input_stride(2)
1133 .output_stride(8)
1134 .block_width(1)
1135 .block_height(4)
1136 .element_size(8)
1137 .iterations(1)
1138 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1139 }
1140
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_1_8_bw_1_2)1141 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_1_8_bw_1_2) {
1142 for(size_t i = 1; i <= 8; ++i){
1143 for(size_t j = 1; j <= 2; ++j){
1144 TransposeMicrokernelTester()
1145 .input_stride(j * 3)
1146 .output_stride(i * 7)
1147 .block_width(j)
1148 .block_height(i)
1149 .element_size(8)
1150 .iterations(1)
1151 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1152 }
1153 }
1154 }
1155
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_2)1156 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_2) {
1157 TransposeMicrokernelTester()
1158 .input_stride(2)
1159 .output_stride(4)
1160 .block_width(2)
1161 .block_height(4)
1162 .element_size(8)
1163 .iterations(1)
1164 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1165 }
1166
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_2_2)1167 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_2_2) {
1168 for(size_t i = 2; i < 2; ++i){
1169 TransposeMicrokernelTester()
1170 .input_stride(i)
1171 .output_stride(8)
1172 .block_width(i)
1173 .block_height(4)
1174 .element_size(8)
1175 .iterations(1)
1176 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1177 }
1178 }
1179
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_8_bw_2_2)1180 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_8_bw_2_2) {
1181 for(size_t i = 2; i < 2; ++i){
1182 TransposeMicrokernelTester()
1183 .input_stride(i)
1184 .output_stride(8)
1185 .block_width(i)
1186 .block_height(8)
1187 .element_size(8)
1188 .iterations(1)
1189 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1190 }
1191 }
1192
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_8_bw_1)1193 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_8_bw_1) {
1194 TransposeMicrokernelTester()
1195 .input_stride(1)
1196 .output_stride(16)
1197 .block_width(1)
1198 .block_height(8)
1199 .element_size(8)
1200 .iterations(1)
1201 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1202 }
1203
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_5_8_bw_1)1204 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_5_8_bw_1){
1205 for(size_t i = 5; i < 8; ++i){
1206 TransposeMicrokernelTester()
1207 .input_stride(18)
1208 .output_stride(i)
1209 .block_width(4)
1210 .block_height(i)
1211 .element_size(8)
1212 .iterations(1)
1213 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1214 }
1215 }
1216
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_5_8_bw_2)1217 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_5_8_bw_2){
1218 for(size_t i = 5; i < 8; ++i){
1219 TransposeMicrokernelTester()
1220 .input_stride(2)
1221 .output_stride(i)
1222 .block_width(2)
1223 .block_height(i)
1224 .element_size(8)
1225 .iterations(1)
1226 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1227 }
1228 }
1229
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_5_8_bw_2_2)1230 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_5_8_bw_2_2) {
1231 for(size_t i = 5; i < 8; ++i){
1232 for(size_t j = 2; j < 2; ++j){
1233 TransposeMicrokernelTester()
1234 .input_stride(j)
1235 .output_stride(i)
1236 .block_width(j)
1237 .block_height(i)
1238 .element_size(8)
1239 .iterations(1)
1240 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1241 }
1242 }
1243 }
1244
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1_is_2)1245 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_is_2) {
1246 TransposeMicrokernelTester()
1247 .input_stride(2)
1248 .output_stride(4)
1249 .block_width(1)
1250 .block_height(4)
1251 .element_size(8)
1252 .iterations(1)
1253 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1254 }
1255
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1_os_8)1256 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_os_8) {
1257 TransposeMicrokernelTester()
1258 .input_stride(1)
1259 .output_stride(8)
1260 .block_width(1)
1261 .block_height(4)
1262 .element_size(8)
1263 .iterations(1)
1264 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1265 }
1266
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_4_bw_1_is_2_os_8)1267 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_4_bw_1_is_2_os_8) {
1268 TransposeMicrokernelTester()
1269 .input_stride(2)
1270 .output_stride(8)
1271 .block_width(1)
1272 .block_height(4)
1273 .element_size(8)
1274 .iterations(1)
1275 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1276 }
1277
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_68_bw_19_ies_19)1278 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_68_bw_19_ies_19) {
1279 TransposeMicrokernelTester()
1280 .input_stride(19)
1281 .output_stride(68)
1282 .block_width(19)
1283 .block_height(68)
1284 .element_size(8)
1285 .input_element_stride(19)
1286 .iterations(1)
1287 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1288 }
1289
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_12_bw_5_oes_19)1290 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_12_bw_5_oes_19) {
1291 TransposeMicrokernelTester()
1292 .input_stride(5)
1293 .output_stride(12)
1294 .block_width(5)
1295 .block_height(12)
1296 .element_size(8)
1297 .output_element_stride(19)
1298 .iterations(1)
1299 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1300 }
1301
TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8,bh_28_bw_23_ies_25_oes_21)1302 TEST(X64_TRANSPOSEC__4X1_SCALAR_FLOAT_8, bh_28_bw_23_ies_25_oes_21) {
1303 TransposeMicrokernelTester()
1304 .input_stride(28)
1305 .output_stride(34)
1306 .block_width(23)
1307 .block_height(28)
1308 .element_size(8)
1309 .input_element_stride(25)
1310 .output_element_stride(21)
1311 .iterations(1)
1312 .Test(xnn_x64_transposec_ukernel__4x1_scalar_float);
1313 }
1314
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1)1315 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1) {
1316 TransposeMicrokernelTester()
1317 .input_stride(2)
1318 .output_stride(8)
1319 .block_width(1)
1320 .block_height(4)
1321 .element_size(8)
1322 .iterations(1)
1323 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1324 }
1325
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_1_8_bw_1_2)1326 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_1_8_bw_1_2) {
1327 for(size_t i = 1; i <= 8; ++i){
1328 for(size_t j = 1; j <= 2; ++j){
1329 TransposeMicrokernelTester()
1330 .input_stride(j * 3)
1331 .output_stride(i * 7)
1332 .block_width(j)
1333 .block_height(i)
1334 .element_size(8)
1335 .iterations(1)
1336 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1337 }
1338 }
1339 }
1340
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_2)1341 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_2) {
1342 TransposeMicrokernelTester()
1343 .input_stride(2)
1344 .output_stride(4)
1345 .block_width(2)
1346 .block_height(4)
1347 .element_size(8)
1348 .iterations(1)
1349 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1350 }
1351
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_2_2)1352 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_2_2) {
1353 for(size_t i = 2; i < 2; ++i){
1354 TransposeMicrokernelTester()
1355 .input_stride(i)
1356 .output_stride(8)
1357 .block_width(i)
1358 .block_height(4)
1359 .element_size(8)
1360 .iterations(1)
1361 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1362 }
1363 }
1364
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_8_bw_2_2)1365 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_8_bw_2_2) {
1366 for(size_t i = 2; i < 2; ++i){
1367 TransposeMicrokernelTester()
1368 .input_stride(i)
1369 .output_stride(8)
1370 .block_width(i)
1371 .block_height(8)
1372 .element_size(8)
1373 .iterations(1)
1374 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1375 }
1376 }
1377
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_8_bw_1)1378 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_8_bw_1) {
1379 TransposeMicrokernelTester()
1380 .input_stride(1)
1381 .output_stride(16)
1382 .block_width(1)
1383 .block_height(8)
1384 .element_size(8)
1385 .iterations(1)
1386 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1387 }
1388
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_5_8_bw_1)1389 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_5_8_bw_1){
1390 for(size_t i = 5; i < 8; ++i){
1391 TransposeMicrokernelTester()
1392 .input_stride(18)
1393 .output_stride(i)
1394 .block_width(4)
1395 .block_height(i)
1396 .element_size(8)
1397 .iterations(1)
1398 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1399 }
1400 }
1401
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_5_8_bw_2)1402 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_5_8_bw_2){
1403 for(size_t i = 5; i < 8; ++i){
1404 TransposeMicrokernelTester()
1405 .input_stride(2)
1406 .output_stride(i)
1407 .block_width(2)
1408 .block_height(i)
1409 .element_size(8)
1410 .iterations(1)
1411 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1412 }
1413 }
1414
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_5_8_bw_2_2)1415 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_5_8_bw_2_2) {
1416 for(size_t i = 5; i < 8; ++i){
1417 for(size_t j = 2; j < 2; ++j){
1418 TransposeMicrokernelTester()
1419 .input_stride(j)
1420 .output_stride(i)
1421 .block_width(j)
1422 .block_height(i)
1423 .element_size(8)
1424 .iterations(1)
1425 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1426 }
1427 }
1428 }
1429
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1_is_2)1430 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_is_2) {
1431 TransposeMicrokernelTester()
1432 .input_stride(2)
1433 .output_stride(4)
1434 .block_width(1)
1435 .block_height(4)
1436 .element_size(8)
1437 .iterations(1)
1438 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1439 }
1440
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1_os_8)1441 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_os_8) {
1442 TransposeMicrokernelTester()
1443 .input_stride(1)
1444 .output_stride(8)
1445 .block_width(1)
1446 .block_height(4)
1447 .element_size(8)
1448 .iterations(1)
1449 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1450 }
1451
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_4_bw_1_is_2_os_8)1452 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_4_bw_1_is_2_os_8) {
1453 TransposeMicrokernelTester()
1454 .input_stride(2)
1455 .output_stride(8)
1456 .block_width(1)
1457 .block_height(4)
1458 .element_size(8)
1459 .iterations(1)
1460 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1461 }
1462
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_68_bw_19_ies_19)1463 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_68_bw_19_ies_19) {
1464 TransposeMicrokernelTester()
1465 .input_stride(19)
1466 .output_stride(68)
1467 .block_width(19)
1468 .block_height(68)
1469 .element_size(8)
1470 .input_element_stride(19)
1471 .iterations(1)
1472 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1473 }
1474
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_12_bw_5_oes_19)1475 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_12_bw_5_oes_19) {
1476 TransposeMicrokernelTester()
1477 .input_stride(5)
1478 .output_stride(12)
1479 .block_width(5)
1480 .block_height(12)
1481 .element_size(8)
1482 .output_element_stride(19)
1483 .iterations(1)
1484 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1485 }
1486
TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8,bh_28_bw_23_ies_25_oes_21)1487 TEST(X64_TRANSPOSEC__4X1_SCALAR_INT_8, bh_28_bw_23_ies_25_oes_21) {
1488 TransposeMicrokernelTester()
1489 .input_stride(28)
1490 .output_stride(34)
1491 .block_width(23)
1492 .block_height(28)
1493 .element_size(8)
1494 .input_element_stride(25)
1495 .output_element_stride(21)
1496 .iterations(1)
1497 .Test(xnn_x64_transposec_ukernel__4x1_scalar_int);
1498 }
1499
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2)1500 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2) {
1501 TransposeMicrokernelTester()
1502 .input_stride(4)
1503 .output_stride(8)
1504 .block_width(2)
1505 .block_height(4)
1506 .element_size(8)
1507 .iterations(1)
1508 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1509 }
1510
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_1_8_bw_1_4)1511 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_1_8_bw_1_4) {
1512 for(size_t i = 1; i <= 8; ++i){
1513 for(size_t j = 1; j <= 4; ++j){
1514 TransposeMicrokernelTester()
1515 .input_stride(j * 3)
1516 .output_stride(i * 7)
1517 .block_width(j)
1518 .block_height(i)
1519 .element_size(8)
1520 .iterations(1)
1521 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1522 }
1523 }
1524 }
1525
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_4)1526 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_4) {
1527 TransposeMicrokernelTester()
1528 .input_stride(4)
1529 .output_stride(4)
1530 .block_width(4)
1531 .block_height(4)
1532 .element_size(8)
1533 .iterations(1)
1534 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1535 }
1536
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_3_4)1537 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_3_4) {
1538 for(size_t i = 3; i < 4; ++i){
1539 TransposeMicrokernelTester()
1540 .input_stride(i)
1541 .output_stride(8)
1542 .block_width(i)
1543 .block_height(4)
1544 .element_size(8)
1545 .iterations(1)
1546 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1547 }
1548 }
1549
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_8_bw_3_4)1550 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_8_bw_3_4) {
1551 for(size_t i = 3; i < 4; ++i){
1552 TransposeMicrokernelTester()
1553 .input_stride(i)
1554 .output_stride(8)
1555 .block_width(i)
1556 .block_height(8)
1557 .element_size(8)
1558 .iterations(1)
1559 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1560 }
1561 }
1562
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_8_bw_2)1563 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_8_bw_2) {
1564 TransposeMicrokernelTester()
1565 .input_stride(2)
1566 .output_stride(16)
1567 .block_width(2)
1568 .block_height(8)
1569 .element_size(8)
1570 .iterations(1)
1571 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1572 }
1573
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_5_8_bw_2)1574 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_5_8_bw_2){
1575 for(size_t i = 5; i < 8; ++i){
1576 TransposeMicrokernelTester()
1577 .input_stride(19)
1578 .output_stride(i)
1579 .block_width(5)
1580 .block_height(i)
1581 .element_size(8)
1582 .iterations(1)
1583 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1584 }
1585 }
1586
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_5_8_bw_4)1587 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_5_8_bw_4){
1588 for(size_t i = 5; i < 8; ++i){
1589 TransposeMicrokernelTester()
1590 .input_stride(4)
1591 .output_stride(i)
1592 .block_width(4)
1593 .block_height(i)
1594 .element_size(8)
1595 .iterations(1)
1596 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1597 }
1598 }
1599
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_5_8_bw_3_4)1600 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_5_8_bw_3_4) {
1601 for(size_t i = 5; i < 8; ++i){
1602 for(size_t j = 3; j < 4; ++j){
1603 TransposeMicrokernelTester()
1604 .input_stride(j)
1605 .output_stride(i)
1606 .block_width(j)
1607 .block_height(i)
1608 .element_size(8)
1609 .iterations(1)
1610 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1611 }
1612 }
1613 }
1614
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2_is_4)1615 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_is_4) {
1616 TransposeMicrokernelTester()
1617 .input_stride(4)
1618 .output_stride(4)
1619 .block_width(2)
1620 .block_height(4)
1621 .element_size(8)
1622 .iterations(1)
1623 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1624 }
1625
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2_os_8)1626 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_os_8) {
1627 TransposeMicrokernelTester()
1628 .input_stride(2)
1629 .output_stride(8)
1630 .block_width(2)
1631 .block_height(4)
1632 .element_size(8)
1633 .iterations(1)
1634 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1635 }
1636
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_4_bw_2_is_4_os_8)1637 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_4_bw_2_is_4_os_8) {
1638 TransposeMicrokernelTester()
1639 .input_stride(4)
1640 .output_stride(8)
1641 .block_width(2)
1642 .block_height(4)
1643 .element_size(8)
1644 .iterations(1)
1645 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1646 }
1647
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_68_bw_38_ies_19)1648 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_68_bw_38_ies_19) {
1649 TransposeMicrokernelTester()
1650 .input_stride(38)
1651 .output_stride(68)
1652 .block_width(38)
1653 .block_height(68)
1654 .element_size(8)
1655 .input_element_stride(19)
1656 .iterations(1)
1657 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1658 }
1659
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_12_bw_10_oes_19)1660 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_12_bw_10_oes_19) {
1661 TransposeMicrokernelTester()
1662 .input_stride(10)
1663 .output_stride(12)
1664 .block_width(10)
1665 .block_height(12)
1666 .element_size(8)
1667 .output_element_stride(19)
1668 .iterations(1)
1669 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1670 }
1671
TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8,bh_28_bw_46_ies_25_oes_21)1672 TEST(X64_TRANSPOSEC__4X2_SCALAR_FLOAT_8, bh_28_bw_46_ies_25_oes_21) {
1673 TransposeMicrokernelTester()
1674 .input_stride(51)
1675 .output_stride(34)
1676 .block_width(46)
1677 .block_height(28)
1678 .element_size(8)
1679 .input_element_stride(25)
1680 .output_element_stride(21)
1681 .iterations(1)
1682 .Test(xnn_x64_transposec_ukernel__4x2_scalar_float);
1683 }
1684
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2)1685 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2) {
1686 TransposeMicrokernelTester()
1687 .input_stride(4)
1688 .output_stride(8)
1689 .block_width(2)
1690 .block_height(4)
1691 .element_size(8)
1692 .iterations(1)
1693 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1694 }
1695
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_1_8_bw_1_4)1696 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_1_8_bw_1_4) {
1697 for(size_t i = 1; i <= 8; ++i){
1698 for(size_t j = 1; j <= 4; ++j){
1699 TransposeMicrokernelTester()
1700 .input_stride(j * 3)
1701 .output_stride(i * 7)
1702 .block_width(j)
1703 .block_height(i)
1704 .element_size(8)
1705 .iterations(1)
1706 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1707 }
1708 }
1709 }
1710
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_4)1711 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_4) {
1712 TransposeMicrokernelTester()
1713 .input_stride(4)
1714 .output_stride(4)
1715 .block_width(4)
1716 .block_height(4)
1717 .element_size(8)
1718 .iterations(1)
1719 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1720 }
1721
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_3_4)1722 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_3_4) {
1723 for(size_t i = 3; i < 4; ++i){
1724 TransposeMicrokernelTester()
1725 .input_stride(i)
1726 .output_stride(8)
1727 .block_width(i)
1728 .block_height(4)
1729 .element_size(8)
1730 .iterations(1)
1731 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1732 }
1733 }
1734
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_8_bw_3_4)1735 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_8_bw_3_4) {
1736 for(size_t i = 3; i < 4; ++i){
1737 TransposeMicrokernelTester()
1738 .input_stride(i)
1739 .output_stride(8)
1740 .block_width(i)
1741 .block_height(8)
1742 .element_size(8)
1743 .iterations(1)
1744 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1745 }
1746 }
1747
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_8_bw_2)1748 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_8_bw_2) {
1749 TransposeMicrokernelTester()
1750 .input_stride(2)
1751 .output_stride(16)
1752 .block_width(2)
1753 .block_height(8)
1754 .element_size(8)
1755 .iterations(1)
1756 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1757 }
1758
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_5_8_bw_2)1759 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_5_8_bw_2){
1760 for(size_t i = 5; i < 8; ++i){
1761 TransposeMicrokernelTester()
1762 .input_stride(19)
1763 .output_stride(i)
1764 .block_width(5)
1765 .block_height(i)
1766 .element_size(8)
1767 .iterations(1)
1768 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1769 }
1770 }
1771
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_5_8_bw_4)1772 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_5_8_bw_4){
1773 for(size_t i = 5; i < 8; ++i){
1774 TransposeMicrokernelTester()
1775 .input_stride(4)
1776 .output_stride(i)
1777 .block_width(4)
1778 .block_height(i)
1779 .element_size(8)
1780 .iterations(1)
1781 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1782 }
1783 }
1784
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_5_8_bw_3_4)1785 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_5_8_bw_3_4) {
1786 for(size_t i = 5; i < 8; ++i){
1787 for(size_t j = 3; j < 4; ++j){
1788 TransposeMicrokernelTester()
1789 .input_stride(j)
1790 .output_stride(i)
1791 .block_width(j)
1792 .block_height(i)
1793 .element_size(8)
1794 .iterations(1)
1795 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1796 }
1797 }
1798 }
1799
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2_is_4)1800 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4) {
1801 TransposeMicrokernelTester()
1802 .input_stride(4)
1803 .output_stride(4)
1804 .block_width(2)
1805 .block_height(4)
1806 .element_size(8)
1807 .iterations(1)
1808 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1809 }
1810
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2_os_8)1811 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_os_8) {
1812 TransposeMicrokernelTester()
1813 .input_stride(2)
1814 .output_stride(8)
1815 .block_width(2)
1816 .block_height(4)
1817 .element_size(8)
1818 .iterations(1)
1819 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1820 }
1821
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_4_bw_2_is_4_os_8)1822 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_4_bw_2_is_4_os_8) {
1823 TransposeMicrokernelTester()
1824 .input_stride(4)
1825 .output_stride(8)
1826 .block_width(2)
1827 .block_height(4)
1828 .element_size(8)
1829 .iterations(1)
1830 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1831 }
1832
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_68_bw_38_ies_19)1833 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_68_bw_38_ies_19) {
1834 TransposeMicrokernelTester()
1835 .input_stride(38)
1836 .output_stride(68)
1837 .block_width(38)
1838 .block_height(68)
1839 .element_size(8)
1840 .input_element_stride(19)
1841 .iterations(1)
1842 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1843 }
1844
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_12_bw_10_oes_19)1845 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_12_bw_10_oes_19) {
1846 TransposeMicrokernelTester()
1847 .input_stride(10)
1848 .output_stride(12)
1849 .block_width(10)
1850 .block_height(12)
1851 .element_size(8)
1852 .output_element_stride(19)
1853 .iterations(1)
1854 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1855 }
1856
TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8,bh_28_bw_46_ies_25_oes_21)1857 TEST(X64_TRANSPOSEC__4X2_SCALAR_INT_8, bh_28_bw_46_ies_25_oes_21) {
1858 TransposeMicrokernelTester()
1859 .input_stride(51)
1860 .output_stride(34)
1861 .block_width(46)
1862 .block_height(28)
1863 .element_size(8)
1864 .input_element_stride(25)
1865 .output_element_stride(21)
1866 .iterations(1)
1867 .Test(xnn_x64_transposec_ukernel__4x2_scalar_int);
1868 }
1869
1870 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2)1871 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2) {
1872 TEST_REQUIRES_X86_SSE2;
1873 TransposeMicrokernelTester()
1874 .input_stride(4)
1875 .output_stride(4)
1876 .block_width(2)
1877 .block_height(2)
1878 .element_size(8)
1879 .iterations(1)
1880 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1881 }
1882
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_1_4_bw_1_4)1883 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_1_4_bw_1_4) {
1884 TEST_REQUIRES_X86_SSE2;
1885 for(size_t i = 1; i <= 4; ++i){
1886 for(size_t j = 1; j <= 4; ++j){
1887 TransposeMicrokernelTester()
1888 .input_stride(j * 3)
1889 .output_stride(i * 7)
1890 .block_width(j)
1891 .block_height(i)
1892 .element_size(8)
1893 .iterations(1)
1894 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1895 }
1896 }
1897 }
1898
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_4)1899 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_4) {
1900 TEST_REQUIRES_X86_SSE2;
1901 TransposeMicrokernelTester()
1902 .input_stride(4)
1903 .output_stride(2)
1904 .block_width(4)
1905 .block_height(2)
1906 .element_size(8)
1907 .iterations(1)
1908 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1909 }
1910
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_3_4)1911 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_3_4) {
1912 TEST_REQUIRES_X86_SSE2;
1913 for(size_t i = 3; i < 4; ++i){
1914 TransposeMicrokernelTester()
1915 .input_stride(i)
1916 .output_stride(4)
1917 .block_width(i)
1918 .block_height(2)
1919 .element_size(8)
1920 .iterations(1)
1921 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1922 }
1923 }
1924
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_4_bw_3_4)1925 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_4_bw_3_4) {
1926 TEST_REQUIRES_X86_SSE2;
1927 for(size_t i = 3; i < 4; ++i){
1928 TransposeMicrokernelTester()
1929 .input_stride(i)
1930 .output_stride(4)
1931 .block_width(i)
1932 .block_height(4)
1933 .element_size(8)
1934 .iterations(1)
1935 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1936 }
1937 }
1938
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_4_bw_2)1939 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_4_bw_2) {
1940 TEST_REQUIRES_X86_SSE2;
1941 TransposeMicrokernelTester()
1942 .input_stride(2)
1943 .output_stride(10)
1944 .block_width(2)
1945 .block_height(4)
1946 .element_size(8)
1947 .iterations(1)
1948 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1949 }
1950
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_3_4_bw_2)1951 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_3_4_bw_2){
1952 TEST_REQUIRES_X86_SSE2;
1953 for(size_t i = 3; i < 4; ++i){
1954 TransposeMicrokernelTester()
1955 .input_stride(19)
1956 .output_stride(i)
1957 .block_width(5)
1958 .block_height(i)
1959 .element_size(8)
1960 .iterations(1)
1961 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1962 }
1963 }
1964
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_3_4_bw_4)1965 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_3_4_bw_4){
1966 TEST_REQUIRES_X86_SSE2;
1967 for(size_t i = 3; i < 4; ++i){
1968 TransposeMicrokernelTester()
1969 .input_stride(4)
1970 .output_stride(i)
1971 .block_width(4)
1972 .block_height(i)
1973 .element_size(8)
1974 .iterations(1)
1975 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1976 }
1977 }
1978
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_3_4_bw_3_4)1979 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_3_4_bw_3_4) {
1980 TEST_REQUIRES_X86_SSE2;
1981 for(size_t i = 3; i < 4; ++i){
1982 for(size_t j = 3; j < 4; ++j){
1983 TransposeMicrokernelTester()
1984 .input_stride(j)
1985 .output_stride(i)
1986 .block_width(j)
1987 .block_height(i)
1988 .element_size(8)
1989 .iterations(1)
1990 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
1991 }
1992 }
1993 }
1994
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2_is_4)1995 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2_is_4) {
1996 TEST_REQUIRES_X86_SSE2;
1997 TransposeMicrokernelTester()
1998 .input_stride(4)
1999 .output_stride(2)
2000 .block_width(2)
2001 .block_height(2)
2002 .element_size(8)
2003 .iterations(1)
2004 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2005 }
2006
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2_os_4)2007 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2_os_4) {
2008 TEST_REQUIRES_X86_SSE2;
2009 TransposeMicrokernelTester()
2010 .input_stride(2)
2011 .output_stride(4)
2012 .block_width(2)
2013 .block_height(2)
2014 .element_size(8)
2015 .iterations(1)
2016 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2017 }
2018
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_2_bw_2_is_4_os_4)2019 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_2_bw_2_is_4_os_4) {
2020 TEST_REQUIRES_X86_SSE2;
2021 TransposeMicrokernelTester()
2022 .input_stride(4)
2023 .output_stride(4)
2024 .block_width(2)
2025 .block_height(2)
2026 .element_size(8)
2027 .iterations(1)
2028 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2029 }
2030
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_34_bw_38_ies_19)2031 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_34_bw_38_ies_19) {
2032 TEST_REQUIRES_X86_SSE2;
2033 TransposeMicrokernelTester()
2034 .input_stride(38)
2035 .output_stride(34)
2036 .block_width(38)
2037 .block_height(34)
2038 .element_size(8)
2039 .input_element_stride(19)
2040 .iterations(1)
2041 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2042 }
2043
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_6_bw_10_oes_19)2044 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_6_bw_10_oes_19) {
2045 TEST_REQUIRES_X86_SSE2;
2046 TransposeMicrokernelTester()
2047 .input_stride(10)
2048 .output_stride(6)
2049 .block_width(10)
2050 .block_height(6)
2051 .element_size(8)
2052 .output_element_stride(19)
2053 .iterations(1)
2054 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2055 }
2056
TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8,bh_14_bw_46_ies_25_oes_21)2057 TEST(X64_TRANSPOSEC__2X2_MULTI_MOV_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2058 TEST_REQUIRES_X86_SSE2;
2059 TransposeMicrokernelTester()
2060 .input_stride(51)
2061 .output_stride(20)
2062 .block_width(46)
2063 .block_height(14)
2064 .element_size(8)
2065 .input_element_stride(25)
2066 .output_element_stride(21)
2067 .iterations(1)
2068 .Test(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2);
2069 }
2070 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2071
2072
2073 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2)2074 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2) {
2075 TEST_REQUIRES_X86_SSE2;
2076 TransposeMicrokernelTester()
2077 .input_stride(4)
2078 .output_stride(4)
2079 .block_width(2)
2080 .block_height(2)
2081 .element_size(8)
2082 .iterations(1)
2083 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2084 }
2085
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_1_4_bw_1_4)2086 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_1_4_bw_1_4) {
2087 TEST_REQUIRES_X86_SSE2;
2088 for(size_t i = 1; i <= 4; ++i){
2089 for(size_t j = 1; j <= 4; ++j){
2090 TransposeMicrokernelTester()
2091 .input_stride(j * 3)
2092 .output_stride(i * 7)
2093 .block_width(j)
2094 .block_height(i)
2095 .element_size(8)
2096 .iterations(1)
2097 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2098 }
2099 }
2100 }
2101
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_4)2102 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_4) {
2103 TEST_REQUIRES_X86_SSE2;
2104 TransposeMicrokernelTester()
2105 .input_stride(4)
2106 .output_stride(2)
2107 .block_width(4)
2108 .block_height(2)
2109 .element_size(8)
2110 .iterations(1)
2111 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2112 }
2113
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_3_4)2114 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_3_4) {
2115 TEST_REQUIRES_X86_SSE2;
2116 for(size_t i = 3; i < 4; ++i){
2117 TransposeMicrokernelTester()
2118 .input_stride(i)
2119 .output_stride(4)
2120 .block_width(i)
2121 .block_height(2)
2122 .element_size(8)
2123 .iterations(1)
2124 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2125 }
2126 }
2127
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_4_bw_3_4)2128 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_4_bw_3_4) {
2129 TEST_REQUIRES_X86_SSE2;
2130 for(size_t i = 3; i < 4; ++i){
2131 TransposeMicrokernelTester()
2132 .input_stride(i)
2133 .output_stride(4)
2134 .block_width(i)
2135 .block_height(4)
2136 .element_size(8)
2137 .iterations(1)
2138 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2139 }
2140 }
2141
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_4_bw_2)2142 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_4_bw_2) {
2143 TEST_REQUIRES_X86_SSE2;
2144 TransposeMicrokernelTester()
2145 .input_stride(2)
2146 .output_stride(10)
2147 .block_width(2)
2148 .block_height(4)
2149 .element_size(8)
2150 .iterations(1)
2151 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2152 }
2153
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_3_4_bw_2)2154 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_3_4_bw_2){
2155 TEST_REQUIRES_X86_SSE2;
2156 for(size_t i = 3; i < 4; ++i){
2157 TransposeMicrokernelTester()
2158 .input_stride(19)
2159 .output_stride(i)
2160 .block_width(5)
2161 .block_height(i)
2162 .element_size(8)
2163 .iterations(1)
2164 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2165 }
2166 }
2167
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_3_4_bw_4)2168 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_3_4_bw_4){
2169 TEST_REQUIRES_X86_SSE2;
2170 for(size_t i = 3; i < 4; ++i){
2171 TransposeMicrokernelTester()
2172 .input_stride(4)
2173 .output_stride(i)
2174 .block_width(4)
2175 .block_height(i)
2176 .element_size(8)
2177 .iterations(1)
2178 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2179 }
2180 }
2181
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_3_4_bw_3_4)2182 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_3_4_bw_3_4) {
2183 TEST_REQUIRES_X86_SSE2;
2184 for(size_t i = 3; i < 4; ++i){
2185 for(size_t j = 3; j < 4; ++j){
2186 TransposeMicrokernelTester()
2187 .input_stride(j)
2188 .output_stride(i)
2189 .block_width(j)
2190 .block_height(i)
2191 .element_size(8)
2192 .iterations(1)
2193 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2194 }
2195 }
2196 }
2197
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2_is_4)2198 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2_is_4) {
2199 TEST_REQUIRES_X86_SSE2;
2200 TransposeMicrokernelTester()
2201 .input_stride(4)
2202 .output_stride(2)
2203 .block_width(2)
2204 .block_height(2)
2205 .element_size(8)
2206 .iterations(1)
2207 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2208 }
2209
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2_os_4)2210 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2_os_4) {
2211 TEST_REQUIRES_X86_SSE2;
2212 TransposeMicrokernelTester()
2213 .input_stride(2)
2214 .output_stride(4)
2215 .block_width(2)
2216 .block_height(2)
2217 .element_size(8)
2218 .iterations(1)
2219 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2220 }
2221
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_2_bw_2_is_4_os_4)2222 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_2_bw_2_is_4_os_4) {
2223 TEST_REQUIRES_X86_SSE2;
2224 TransposeMicrokernelTester()
2225 .input_stride(4)
2226 .output_stride(4)
2227 .block_width(2)
2228 .block_height(2)
2229 .element_size(8)
2230 .iterations(1)
2231 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2232 }
2233
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_34_bw_38_ies_19)2234 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_34_bw_38_ies_19) {
2235 TEST_REQUIRES_X86_SSE2;
2236 TransposeMicrokernelTester()
2237 .input_stride(38)
2238 .output_stride(34)
2239 .block_width(38)
2240 .block_height(34)
2241 .element_size(8)
2242 .input_element_stride(19)
2243 .iterations(1)
2244 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2245 }
2246
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_6_bw_10_oes_19)2247 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_6_bw_10_oes_19) {
2248 TEST_REQUIRES_X86_SSE2;
2249 TransposeMicrokernelTester()
2250 .input_stride(10)
2251 .output_stride(6)
2252 .block_width(10)
2253 .block_height(6)
2254 .element_size(8)
2255 .output_element_stride(19)
2256 .iterations(1)
2257 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2258 }
2259
TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8,bh_14_bw_46_ies_25_oes_21)2260 TEST(X64_TRANSPOSEC__2X2_MULTI_MULTI_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2261 TEST_REQUIRES_X86_SSE2;
2262 TransposeMicrokernelTester()
2263 .input_stride(51)
2264 .output_stride(20)
2265 .block_width(46)
2266 .block_height(14)
2267 .element_size(8)
2268 .input_element_stride(25)
2269 .output_element_stride(21)
2270 .iterations(1)
2271 .Test(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2);
2272 }
2273 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2274
2275
2276 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2)2277 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2) {
2278 TEST_REQUIRES_X86_SSE2;
2279 TransposeMicrokernelTester()
2280 .input_stride(4)
2281 .output_stride(4)
2282 .block_width(2)
2283 .block_height(2)
2284 .element_size(8)
2285 .iterations(1)
2286 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2287 }
2288
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_1_4_bw_1_4)2289 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_1_4_bw_1_4) {
2290 TEST_REQUIRES_X86_SSE2;
2291 for(size_t i = 1; i <= 4; ++i){
2292 for(size_t j = 1; j <= 4; ++j){
2293 TransposeMicrokernelTester()
2294 .input_stride(j * 3)
2295 .output_stride(i * 7)
2296 .block_width(j)
2297 .block_height(i)
2298 .element_size(8)
2299 .iterations(1)
2300 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2301 }
2302 }
2303 }
2304
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_4)2305 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_4) {
2306 TEST_REQUIRES_X86_SSE2;
2307 TransposeMicrokernelTester()
2308 .input_stride(4)
2309 .output_stride(2)
2310 .block_width(4)
2311 .block_height(2)
2312 .element_size(8)
2313 .iterations(1)
2314 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2315 }
2316
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_3_4)2317 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_3_4) {
2318 TEST_REQUIRES_X86_SSE2;
2319 for(size_t i = 3; i < 4; ++i){
2320 TransposeMicrokernelTester()
2321 .input_stride(i)
2322 .output_stride(4)
2323 .block_width(i)
2324 .block_height(2)
2325 .element_size(8)
2326 .iterations(1)
2327 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2328 }
2329 }
2330
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_4_bw_3_4)2331 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_4_bw_3_4) {
2332 TEST_REQUIRES_X86_SSE2;
2333 for(size_t i = 3; i < 4; ++i){
2334 TransposeMicrokernelTester()
2335 .input_stride(i)
2336 .output_stride(4)
2337 .block_width(i)
2338 .block_height(4)
2339 .element_size(8)
2340 .iterations(1)
2341 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2342 }
2343 }
2344
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_4_bw_2)2345 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_4_bw_2) {
2346 TEST_REQUIRES_X86_SSE2;
2347 TransposeMicrokernelTester()
2348 .input_stride(2)
2349 .output_stride(10)
2350 .block_width(2)
2351 .block_height(4)
2352 .element_size(8)
2353 .iterations(1)
2354 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2355 }
2356
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_3_4_bw_2)2357 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_3_4_bw_2){
2358 TEST_REQUIRES_X86_SSE2;
2359 for(size_t i = 3; i < 4; ++i){
2360 TransposeMicrokernelTester()
2361 .input_stride(19)
2362 .output_stride(i)
2363 .block_width(5)
2364 .block_height(i)
2365 .element_size(8)
2366 .iterations(1)
2367 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2368 }
2369 }
2370
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_3_4_bw_4)2371 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_3_4_bw_4){
2372 TEST_REQUIRES_X86_SSE2;
2373 for(size_t i = 3; i < 4; ++i){
2374 TransposeMicrokernelTester()
2375 .input_stride(4)
2376 .output_stride(i)
2377 .block_width(4)
2378 .block_height(i)
2379 .element_size(8)
2380 .iterations(1)
2381 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2382 }
2383 }
2384
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_3_4_bw_3_4)2385 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_3_4_bw_3_4) {
2386 TEST_REQUIRES_X86_SSE2;
2387 for(size_t i = 3; i < 4; ++i){
2388 for(size_t j = 3; j < 4; ++j){
2389 TransposeMicrokernelTester()
2390 .input_stride(j)
2391 .output_stride(i)
2392 .block_width(j)
2393 .block_height(i)
2394 .element_size(8)
2395 .iterations(1)
2396 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2397 }
2398 }
2399 }
2400
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2_is_4)2401 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2_is_4) {
2402 TEST_REQUIRES_X86_SSE2;
2403 TransposeMicrokernelTester()
2404 .input_stride(4)
2405 .output_stride(2)
2406 .block_width(2)
2407 .block_height(2)
2408 .element_size(8)
2409 .iterations(1)
2410 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2411 }
2412
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2_os_4)2413 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2_os_4) {
2414 TEST_REQUIRES_X86_SSE2;
2415 TransposeMicrokernelTester()
2416 .input_stride(2)
2417 .output_stride(4)
2418 .block_width(2)
2419 .block_height(2)
2420 .element_size(8)
2421 .iterations(1)
2422 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2423 }
2424
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_2_bw_2_is_4_os_4)2425 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_2_bw_2_is_4_os_4) {
2426 TEST_REQUIRES_X86_SSE2;
2427 TransposeMicrokernelTester()
2428 .input_stride(4)
2429 .output_stride(4)
2430 .block_width(2)
2431 .block_height(2)
2432 .element_size(8)
2433 .iterations(1)
2434 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2435 }
2436
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_34_bw_38_ies_19)2437 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_34_bw_38_ies_19) {
2438 TEST_REQUIRES_X86_SSE2;
2439 TransposeMicrokernelTester()
2440 .input_stride(38)
2441 .output_stride(34)
2442 .block_width(38)
2443 .block_height(34)
2444 .element_size(8)
2445 .input_element_stride(19)
2446 .iterations(1)
2447 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2448 }
2449
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_6_bw_10_oes_19)2450 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_6_bw_10_oes_19) {
2451 TEST_REQUIRES_X86_SSE2;
2452 TransposeMicrokernelTester()
2453 .input_stride(10)
2454 .output_stride(6)
2455 .block_width(10)
2456 .block_height(6)
2457 .element_size(8)
2458 .output_element_stride(19)
2459 .iterations(1)
2460 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2461 }
2462
TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8,bh_14_bw_46_ies_25_oes_21)2463 TEST(X64_TRANSPOSEC__2X2_MULTI_SWITCH_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2464 TEST_REQUIRES_X86_SSE2;
2465 TransposeMicrokernelTester()
2466 .input_stride(51)
2467 .output_stride(20)
2468 .block_width(46)
2469 .block_height(14)
2470 .element_size(8)
2471 .input_element_stride(25)
2472 .output_element_stride(21)
2473 .iterations(1)
2474 .Test(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2);
2475 }
2476 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2477
2478
2479 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2)2480 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2) {
2481 TEST_REQUIRES_X86_SSE2;
2482 TransposeMicrokernelTester()
2483 .input_stride(4)
2484 .output_stride(4)
2485 .block_width(2)
2486 .block_height(2)
2487 .element_size(8)
2488 .iterations(1)
2489 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2490 }
2491
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_1_4_bw_1_4)2492 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_1_4_bw_1_4) {
2493 TEST_REQUIRES_X86_SSE2;
2494 for(size_t i = 1; i <= 4; ++i){
2495 for(size_t j = 1; j <= 4; ++j){
2496 TransposeMicrokernelTester()
2497 .input_stride(j * 3)
2498 .output_stride(i * 7)
2499 .block_width(j)
2500 .block_height(i)
2501 .element_size(8)
2502 .iterations(1)
2503 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2504 }
2505 }
2506 }
2507
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_4)2508 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_4) {
2509 TEST_REQUIRES_X86_SSE2;
2510 TransposeMicrokernelTester()
2511 .input_stride(4)
2512 .output_stride(2)
2513 .block_width(4)
2514 .block_height(2)
2515 .element_size(8)
2516 .iterations(1)
2517 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2518 }
2519
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_3_4)2520 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_3_4) {
2521 TEST_REQUIRES_X86_SSE2;
2522 for(size_t i = 3; i < 4; ++i){
2523 TransposeMicrokernelTester()
2524 .input_stride(i)
2525 .output_stride(4)
2526 .block_width(i)
2527 .block_height(2)
2528 .element_size(8)
2529 .iterations(1)
2530 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2531 }
2532 }
2533
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_4_bw_3_4)2534 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_4_bw_3_4) {
2535 TEST_REQUIRES_X86_SSE2;
2536 for(size_t i = 3; i < 4; ++i){
2537 TransposeMicrokernelTester()
2538 .input_stride(i)
2539 .output_stride(4)
2540 .block_width(i)
2541 .block_height(4)
2542 .element_size(8)
2543 .iterations(1)
2544 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2545 }
2546 }
2547
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_4_bw_2)2548 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_4_bw_2) {
2549 TEST_REQUIRES_X86_SSE2;
2550 TransposeMicrokernelTester()
2551 .input_stride(2)
2552 .output_stride(10)
2553 .block_width(2)
2554 .block_height(4)
2555 .element_size(8)
2556 .iterations(1)
2557 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2558 }
2559
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_3_4_bw_2)2560 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_3_4_bw_2){
2561 TEST_REQUIRES_X86_SSE2;
2562 for(size_t i = 3; i < 4; ++i){
2563 TransposeMicrokernelTester()
2564 .input_stride(19)
2565 .output_stride(i)
2566 .block_width(5)
2567 .block_height(i)
2568 .element_size(8)
2569 .iterations(1)
2570 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2571 }
2572 }
2573
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_3_4_bw_4)2574 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_3_4_bw_4){
2575 TEST_REQUIRES_X86_SSE2;
2576 for(size_t i = 3; i < 4; ++i){
2577 TransposeMicrokernelTester()
2578 .input_stride(4)
2579 .output_stride(i)
2580 .block_width(4)
2581 .block_height(i)
2582 .element_size(8)
2583 .iterations(1)
2584 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2585 }
2586 }
2587
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_3_4_bw_3_4)2588 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_3_4_bw_3_4) {
2589 TEST_REQUIRES_X86_SSE2;
2590 for(size_t i = 3; i < 4; ++i){
2591 for(size_t j = 3; j < 4; ++j){
2592 TransposeMicrokernelTester()
2593 .input_stride(j)
2594 .output_stride(i)
2595 .block_width(j)
2596 .block_height(i)
2597 .element_size(8)
2598 .iterations(1)
2599 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2600 }
2601 }
2602 }
2603
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2_is_4)2604 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2_is_4) {
2605 TEST_REQUIRES_X86_SSE2;
2606 TransposeMicrokernelTester()
2607 .input_stride(4)
2608 .output_stride(2)
2609 .block_width(2)
2610 .block_height(2)
2611 .element_size(8)
2612 .iterations(1)
2613 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2614 }
2615
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2_os_4)2616 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2_os_4) {
2617 TEST_REQUIRES_X86_SSE2;
2618 TransposeMicrokernelTester()
2619 .input_stride(2)
2620 .output_stride(4)
2621 .block_width(2)
2622 .block_height(2)
2623 .element_size(8)
2624 .iterations(1)
2625 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2626 }
2627
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_2_bw_2_is_4_os_4)2628 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_2_bw_2_is_4_os_4) {
2629 TEST_REQUIRES_X86_SSE2;
2630 TransposeMicrokernelTester()
2631 .input_stride(4)
2632 .output_stride(4)
2633 .block_width(2)
2634 .block_height(2)
2635 .element_size(8)
2636 .iterations(1)
2637 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2638 }
2639
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_34_bw_38_ies_19)2640 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_34_bw_38_ies_19) {
2641 TEST_REQUIRES_X86_SSE2;
2642 TransposeMicrokernelTester()
2643 .input_stride(38)
2644 .output_stride(34)
2645 .block_width(38)
2646 .block_height(34)
2647 .element_size(8)
2648 .input_element_stride(19)
2649 .iterations(1)
2650 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2651 }
2652
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_6_bw_10_oes_19)2653 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_6_bw_10_oes_19) {
2654 TEST_REQUIRES_X86_SSE2;
2655 TransposeMicrokernelTester()
2656 .input_stride(10)
2657 .output_stride(6)
2658 .block_width(10)
2659 .block_height(6)
2660 .element_size(8)
2661 .output_element_stride(19)
2662 .iterations(1)
2663 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2664 }
2665
TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8,bh_14_bw_46_ies_25_oes_21)2666 TEST(X64_TRANSPOSEC__2X2_REUSE_MOV_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2667 TEST_REQUIRES_X86_SSE2;
2668 TransposeMicrokernelTester()
2669 .input_stride(51)
2670 .output_stride(20)
2671 .block_width(46)
2672 .block_height(14)
2673 .element_size(8)
2674 .input_element_stride(25)
2675 .output_element_stride(21)
2676 .iterations(1)
2677 .Test(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2);
2678 }
2679 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2680
2681
2682 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2)2683 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2) {
2684 TEST_REQUIRES_X86_SSE2;
2685 TransposeMicrokernelTester()
2686 .input_stride(4)
2687 .output_stride(4)
2688 .block_width(2)
2689 .block_height(2)
2690 .element_size(8)
2691 .iterations(1)
2692 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2693 }
2694
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_1_4_bw_1_4)2695 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_1_4_bw_1_4) {
2696 TEST_REQUIRES_X86_SSE2;
2697 for(size_t i = 1; i <= 4; ++i){
2698 for(size_t j = 1; j <= 4; ++j){
2699 TransposeMicrokernelTester()
2700 .input_stride(j * 3)
2701 .output_stride(i * 7)
2702 .block_width(j)
2703 .block_height(i)
2704 .element_size(8)
2705 .iterations(1)
2706 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2707 }
2708 }
2709 }
2710
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_4)2711 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_4) {
2712 TEST_REQUIRES_X86_SSE2;
2713 TransposeMicrokernelTester()
2714 .input_stride(4)
2715 .output_stride(2)
2716 .block_width(4)
2717 .block_height(2)
2718 .element_size(8)
2719 .iterations(1)
2720 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2721 }
2722
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_3_4)2723 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_3_4) {
2724 TEST_REQUIRES_X86_SSE2;
2725 for(size_t i = 3; i < 4; ++i){
2726 TransposeMicrokernelTester()
2727 .input_stride(i)
2728 .output_stride(4)
2729 .block_width(i)
2730 .block_height(2)
2731 .element_size(8)
2732 .iterations(1)
2733 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2734 }
2735 }
2736
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_4_bw_3_4)2737 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_4_bw_3_4) {
2738 TEST_REQUIRES_X86_SSE2;
2739 for(size_t i = 3; i < 4; ++i){
2740 TransposeMicrokernelTester()
2741 .input_stride(i)
2742 .output_stride(4)
2743 .block_width(i)
2744 .block_height(4)
2745 .element_size(8)
2746 .iterations(1)
2747 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2748 }
2749 }
2750
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_4_bw_2)2751 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_4_bw_2) {
2752 TEST_REQUIRES_X86_SSE2;
2753 TransposeMicrokernelTester()
2754 .input_stride(2)
2755 .output_stride(10)
2756 .block_width(2)
2757 .block_height(4)
2758 .element_size(8)
2759 .iterations(1)
2760 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2761 }
2762
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_3_4_bw_2)2763 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_3_4_bw_2){
2764 TEST_REQUIRES_X86_SSE2;
2765 for(size_t i = 3; i < 4; ++i){
2766 TransposeMicrokernelTester()
2767 .input_stride(19)
2768 .output_stride(i)
2769 .block_width(5)
2770 .block_height(i)
2771 .element_size(8)
2772 .iterations(1)
2773 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2774 }
2775 }
2776
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_3_4_bw_4)2777 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_3_4_bw_4){
2778 TEST_REQUIRES_X86_SSE2;
2779 for(size_t i = 3; i < 4; ++i){
2780 TransposeMicrokernelTester()
2781 .input_stride(4)
2782 .output_stride(i)
2783 .block_width(4)
2784 .block_height(i)
2785 .element_size(8)
2786 .iterations(1)
2787 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2788 }
2789 }
2790
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_3_4_bw_3_4)2791 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_3_4_bw_3_4) {
2792 TEST_REQUIRES_X86_SSE2;
2793 for(size_t i = 3; i < 4; ++i){
2794 for(size_t j = 3; j < 4; ++j){
2795 TransposeMicrokernelTester()
2796 .input_stride(j)
2797 .output_stride(i)
2798 .block_width(j)
2799 .block_height(i)
2800 .element_size(8)
2801 .iterations(1)
2802 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2803 }
2804 }
2805 }
2806
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2_is_4)2807 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2_is_4) {
2808 TEST_REQUIRES_X86_SSE2;
2809 TransposeMicrokernelTester()
2810 .input_stride(4)
2811 .output_stride(2)
2812 .block_width(2)
2813 .block_height(2)
2814 .element_size(8)
2815 .iterations(1)
2816 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2817 }
2818
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2_os_4)2819 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2_os_4) {
2820 TEST_REQUIRES_X86_SSE2;
2821 TransposeMicrokernelTester()
2822 .input_stride(2)
2823 .output_stride(4)
2824 .block_width(2)
2825 .block_height(2)
2826 .element_size(8)
2827 .iterations(1)
2828 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2829 }
2830
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_2_bw_2_is_4_os_4)2831 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_2_bw_2_is_4_os_4) {
2832 TEST_REQUIRES_X86_SSE2;
2833 TransposeMicrokernelTester()
2834 .input_stride(4)
2835 .output_stride(4)
2836 .block_width(2)
2837 .block_height(2)
2838 .element_size(8)
2839 .iterations(1)
2840 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2841 }
2842
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_34_bw_38_ies_19)2843 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_34_bw_38_ies_19) {
2844 TEST_REQUIRES_X86_SSE2;
2845 TransposeMicrokernelTester()
2846 .input_stride(38)
2847 .output_stride(34)
2848 .block_width(38)
2849 .block_height(34)
2850 .element_size(8)
2851 .input_element_stride(19)
2852 .iterations(1)
2853 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2854 }
2855
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_6_bw_10_oes_19)2856 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_6_bw_10_oes_19) {
2857 TEST_REQUIRES_X86_SSE2;
2858 TransposeMicrokernelTester()
2859 .input_stride(10)
2860 .output_stride(6)
2861 .block_width(10)
2862 .block_height(6)
2863 .element_size(8)
2864 .output_element_stride(19)
2865 .iterations(1)
2866 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2867 }
2868
TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8,bh_14_bw_46_ies_25_oes_21)2869 TEST(X64_TRANSPOSEC__2X2_REUSE_MULTI_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
2870 TEST_REQUIRES_X86_SSE2;
2871 TransposeMicrokernelTester()
2872 .input_stride(51)
2873 .output_stride(20)
2874 .block_width(46)
2875 .block_height(14)
2876 .element_size(8)
2877 .input_element_stride(25)
2878 .output_element_stride(21)
2879 .iterations(1)
2880 .Test(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2);
2881 }
2882 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2883
2884
2885 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2)2886 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2) {
2887 TEST_REQUIRES_X86_SSE2;
2888 TransposeMicrokernelTester()
2889 .input_stride(4)
2890 .output_stride(4)
2891 .block_width(2)
2892 .block_height(2)
2893 .element_size(8)
2894 .iterations(1)
2895 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2896 }
2897
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_1_4_bw_1_4)2898 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_1_4_bw_1_4) {
2899 TEST_REQUIRES_X86_SSE2;
2900 for(size_t i = 1; i <= 4; ++i){
2901 for(size_t j = 1; j <= 4; ++j){
2902 TransposeMicrokernelTester()
2903 .input_stride(j * 3)
2904 .output_stride(i * 7)
2905 .block_width(j)
2906 .block_height(i)
2907 .element_size(8)
2908 .iterations(1)
2909 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2910 }
2911 }
2912 }
2913
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_4)2914 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_4) {
2915 TEST_REQUIRES_X86_SSE2;
2916 TransposeMicrokernelTester()
2917 .input_stride(4)
2918 .output_stride(2)
2919 .block_width(4)
2920 .block_height(2)
2921 .element_size(8)
2922 .iterations(1)
2923 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2924 }
2925
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_3_4)2926 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_3_4) {
2927 TEST_REQUIRES_X86_SSE2;
2928 for(size_t i = 3; i < 4; ++i){
2929 TransposeMicrokernelTester()
2930 .input_stride(i)
2931 .output_stride(4)
2932 .block_width(i)
2933 .block_height(2)
2934 .element_size(8)
2935 .iterations(1)
2936 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2937 }
2938 }
2939
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_4_bw_3_4)2940 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_4_bw_3_4) {
2941 TEST_REQUIRES_X86_SSE2;
2942 for(size_t i = 3; i < 4; ++i){
2943 TransposeMicrokernelTester()
2944 .input_stride(i)
2945 .output_stride(4)
2946 .block_width(i)
2947 .block_height(4)
2948 .element_size(8)
2949 .iterations(1)
2950 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2951 }
2952 }
2953
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_4_bw_2)2954 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_4_bw_2) {
2955 TEST_REQUIRES_X86_SSE2;
2956 TransposeMicrokernelTester()
2957 .input_stride(2)
2958 .output_stride(10)
2959 .block_width(2)
2960 .block_height(4)
2961 .element_size(8)
2962 .iterations(1)
2963 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2964 }
2965
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_3_4_bw_2)2966 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_3_4_bw_2){
2967 TEST_REQUIRES_X86_SSE2;
2968 for(size_t i = 3; i < 4; ++i){
2969 TransposeMicrokernelTester()
2970 .input_stride(19)
2971 .output_stride(i)
2972 .block_width(5)
2973 .block_height(i)
2974 .element_size(8)
2975 .iterations(1)
2976 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2977 }
2978 }
2979
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_3_4_bw_4)2980 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_3_4_bw_4){
2981 TEST_REQUIRES_X86_SSE2;
2982 for(size_t i = 3; i < 4; ++i){
2983 TransposeMicrokernelTester()
2984 .input_stride(4)
2985 .output_stride(i)
2986 .block_width(4)
2987 .block_height(i)
2988 .element_size(8)
2989 .iterations(1)
2990 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
2991 }
2992 }
2993
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_3_4_bw_3_4)2994 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_3_4_bw_3_4) {
2995 TEST_REQUIRES_X86_SSE2;
2996 for(size_t i = 3; i < 4; ++i){
2997 for(size_t j = 3; j < 4; ++j){
2998 TransposeMicrokernelTester()
2999 .input_stride(j)
3000 .output_stride(i)
3001 .block_width(j)
3002 .block_height(i)
3003 .element_size(8)
3004 .iterations(1)
3005 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3006 }
3007 }
3008 }
3009
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2_is_4)3010 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2_is_4) {
3011 TEST_REQUIRES_X86_SSE2;
3012 TransposeMicrokernelTester()
3013 .input_stride(4)
3014 .output_stride(2)
3015 .block_width(2)
3016 .block_height(2)
3017 .element_size(8)
3018 .iterations(1)
3019 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3020 }
3021
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2_os_4)3022 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2_os_4) {
3023 TEST_REQUIRES_X86_SSE2;
3024 TransposeMicrokernelTester()
3025 .input_stride(2)
3026 .output_stride(4)
3027 .block_width(2)
3028 .block_height(2)
3029 .element_size(8)
3030 .iterations(1)
3031 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3032 }
3033
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_2_bw_2_is_4_os_4)3034 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_2_bw_2_is_4_os_4) {
3035 TEST_REQUIRES_X86_SSE2;
3036 TransposeMicrokernelTester()
3037 .input_stride(4)
3038 .output_stride(4)
3039 .block_width(2)
3040 .block_height(2)
3041 .element_size(8)
3042 .iterations(1)
3043 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3044 }
3045
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_34_bw_38_ies_19)3046 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_34_bw_38_ies_19) {
3047 TEST_REQUIRES_X86_SSE2;
3048 TransposeMicrokernelTester()
3049 .input_stride(38)
3050 .output_stride(34)
3051 .block_width(38)
3052 .block_height(34)
3053 .element_size(8)
3054 .input_element_stride(19)
3055 .iterations(1)
3056 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3057 }
3058
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_6_bw_10_oes_19)3059 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_6_bw_10_oes_19) {
3060 TEST_REQUIRES_X86_SSE2;
3061 TransposeMicrokernelTester()
3062 .input_stride(10)
3063 .output_stride(6)
3064 .block_width(10)
3065 .block_height(6)
3066 .element_size(8)
3067 .output_element_stride(19)
3068 .iterations(1)
3069 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3070 }
3071
TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8,bh_14_bw_46_ies_25_oes_21)3072 TEST(X64_TRANSPOSEC__2X2_REUSE_SWITCH_SSE2_8, bh_14_bw_46_ies_25_oes_21) {
3073 TEST_REQUIRES_X86_SSE2;
3074 TransposeMicrokernelTester()
3075 .input_stride(51)
3076 .output_stride(20)
3077 .block_width(46)
3078 .block_height(14)
3079 .element_size(8)
3080 .input_element_stride(25)
3081 .output_element_stride(21)
3082 .iterations(1)
3083 .Test(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2);
3084 }
3085 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3086