1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x8-transpose.yaml
8 // Generator: tools/generate-transpose-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18
19
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2)20 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(4)
23 .output_stride(2)
24 .block_width(2)
25 .block_height(1)
26 .element_size(1)
27 .iterations(1)
28 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
29 }
30
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_2_bw_1_4)31 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_2_bw_1_4) {
32 for(size_t i = 1; i <= 2; ++i){
33 for(size_t j = 1; j <= 4; ++j){
34 TransposeMicrokernelTester()
35 .input_stride(j * 3)
36 .output_stride(i * 7)
37 .block_width(j)
38 .block_height(i)
39 .element_size(1)
40 .iterations(1)
41 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
42 }
43 }
44 }
45
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_4)46 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_4) {
47 TransposeMicrokernelTester()
48 .input_stride(4)
49 .output_stride(1)
50 .block_width(4)
51 .block_height(1)
52 .element_size(1)
53 .iterations(1)
54 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
55 }
56
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_3_4)57 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_3_4) {
58 for(size_t i = 3; i < 4; ++i){
59 TransposeMicrokernelTester()
60 .input_stride(i)
61 .output_stride(2)
62 .block_width(i)
63 .block_height(1)
64 .element_size(1)
65 .iterations(1)
66 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
67 }
68 }
69
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_bw_3_4)70 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_bw_3_4) {
71 for(size_t i = 3; i < 4; ++i){
72 TransposeMicrokernelTester()
73 .input_stride(i)
74 .output_stride(2)
75 .block_width(i)
76 .block_height(2)
77 .element_size(1)
78 .iterations(1)
79 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
80 }
81 }
82
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_bw_2)83 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_bw_2) {
84 TransposeMicrokernelTester()
85 .input_stride(2)
86 .output_stride(7)
87 .block_width(2)
88 .block_height(2)
89 .element_size(1)
90 .iterations(1)
91 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
92 }
93
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_2_bw_2)94 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_2_bw_2){
95 for(size_t i = 2; i < 2; ++i){
96 TransposeMicrokernelTester()
97 .input_stride(19)
98 .output_stride(i)
99 .block_width(5)
100 .block_height(i)
101 .element_size(1)
102 .iterations(1)
103 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
104 }
105 }
106
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_2_bw_4)107 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_2_bw_4){
108 for(size_t i = 2; i < 2; ++i){
109 TransposeMicrokernelTester()
110 .input_stride(4)
111 .output_stride(i)
112 .block_width(4)
113 .block_height(i)
114 .element_size(1)
115 .iterations(1)
116 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
117 }
118 }
119
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_2_2_bw_3_4)120 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_2_2_bw_3_4) {
121 for(size_t i = 2; i < 2; ++i){
122 for(size_t j = 3; j < 4; ++j){
123 TransposeMicrokernelTester()
124 .input_stride(j)
125 .output_stride(i)
126 .block_width(j)
127 .block_height(i)
128 .element_size(1)
129 .iterations(1)
130 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
131 }
132 }
133 }
134
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2_is_4)135 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_is_4) {
136 TransposeMicrokernelTester()
137 .input_stride(4)
138 .output_stride(1)
139 .block_width(2)
140 .block_height(1)
141 .element_size(1)
142 .iterations(1)
143 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
144 }
145
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2_os_2)146 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(2)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .element_size(1)
153 .iterations(1)
154 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
155 }
156
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_1_bw_2_is_4_os_2)157 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_1_bw_2_is_4_os_2) {
158 TransposeMicrokernelTester()
159 .input_stride(4)
160 .output_stride(2)
161 .block_width(2)
162 .block_height(1)
163 .element_size(1)
164 .iterations(1)
165 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
166 }
167
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_17_bw_38_ies_12)168 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_17_bw_38_ies_12) {
169 TransposeMicrokernelTester()
170 .input_stride(38)
171 .output_stride(17)
172 .block_width(38)
173 .block_height(17)
174 .element_size(1)
175 .input_element_stride(12)
176 .iterations(1)
177 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
178 }
179
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_3_bw_10_oes_12)180 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_3_bw_10_oes_12) {
181 TransposeMicrokernelTester()
182 .input_stride(10)
183 .output_stride(3)
184 .block_width(10)
185 .block_height(3)
186 .element_size(1)
187 .output_element_stride(12)
188 .iterations(1)
189 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
190 }
191
TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1,bh_7_bw_46_ies_18_oes_14)192 TEST(X8_TRANSPOSEC__1X2_SCALAR_INT_1, bh_7_bw_46_ies_18_oes_14) {
193 TransposeMicrokernelTester()
194 .input_stride(51)
195 .output_stride(13)
196 .block_width(46)
197 .block_height(7)
198 .element_size(1)
199 .input_element_stride(18)
200 .output_element_stride(14)
201 .iterations(1)
202 .Test(xnn_x8_transposec_ukernel__1x2_scalar_int);
203 }
204
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4)205 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4) {
206 TransposeMicrokernelTester()
207 .input_stride(8)
208 .output_stride(2)
209 .block_width(4)
210 .block_height(1)
211 .element_size(1)
212 .iterations(1)
213 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
214 }
215
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_2_bw_1_8)216 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_2_bw_1_8) {
217 for(size_t i = 1; i <= 2; ++i){
218 for(size_t j = 1; j <= 8; ++j){
219 TransposeMicrokernelTester()
220 .input_stride(j * 3)
221 .output_stride(i * 7)
222 .block_width(j)
223 .block_height(i)
224 .element_size(1)
225 .iterations(1)
226 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
227 }
228 }
229 }
230
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_8)231 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_8) {
232 TransposeMicrokernelTester()
233 .input_stride(8)
234 .output_stride(1)
235 .block_width(8)
236 .block_height(1)
237 .element_size(1)
238 .iterations(1)
239 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
240 }
241
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_5_8)242 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_5_8) {
243 for(size_t i = 5; i < 8; ++i){
244 TransposeMicrokernelTester()
245 .input_stride(i)
246 .output_stride(2)
247 .block_width(i)
248 .block_height(1)
249 .element_size(1)
250 .iterations(1)
251 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
252 }
253 }
254
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_bw_5_8)255 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_bw_5_8) {
256 for(size_t i = 5; i < 8; ++i){
257 TransposeMicrokernelTester()
258 .input_stride(i)
259 .output_stride(2)
260 .block_width(i)
261 .block_height(2)
262 .element_size(1)
263 .iterations(1)
264 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
265 }
266 }
267
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_bw_4)268 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_bw_4) {
269 TransposeMicrokernelTester()
270 .input_stride(4)
271 .output_stride(7)
272 .block_width(4)
273 .block_height(2)
274 .element_size(1)
275 .iterations(1)
276 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
277 }
278
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_2_bw_4)279 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_2_bw_4){
280 for(size_t i = 2; i < 2; ++i){
281 TransposeMicrokernelTester()
282 .input_stride(21)
283 .output_stride(i)
284 .block_width(7)
285 .block_height(i)
286 .element_size(1)
287 .iterations(1)
288 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
289 }
290 }
291
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_2_bw_8)292 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_2_bw_8){
293 for(size_t i = 2; i < 2; ++i){
294 TransposeMicrokernelTester()
295 .input_stride(8)
296 .output_stride(i)
297 .block_width(8)
298 .block_height(i)
299 .element_size(1)
300 .iterations(1)
301 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
302 }
303 }
304
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_2_2_bw_5_8)305 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_2_2_bw_5_8) {
306 for(size_t i = 2; i < 2; ++i){
307 for(size_t j = 5; j < 8; ++j){
308 TransposeMicrokernelTester()
309 .input_stride(j)
310 .output_stride(i)
311 .block_width(j)
312 .block_height(i)
313 .element_size(1)
314 .iterations(1)
315 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
316 }
317 }
318 }
319
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4_is_8)320 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_is_8) {
321 TransposeMicrokernelTester()
322 .input_stride(8)
323 .output_stride(1)
324 .block_width(4)
325 .block_height(1)
326 .element_size(1)
327 .iterations(1)
328 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
329 }
330
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4_os_2)331 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_os_2) {
332 TransposeMicrokernelTester()
333 .input_stride(4)
334 .output_stride(2)
335 .block_width(4)
336 .block_height(1)
337 .element_size(1)
338 .iterations(1)
339 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
340 }
341
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_1_bw_4_is_8_os_2)342 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_1_bw_4_is_8_os_2) {
343 TransposeMicrokernelTester()
344 .input_stride(8)
345 .output_stride(2)
346 .block_width(4)
347 .block_height(1)
348 .element_size(1)
349 .iterations(1)
350 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
351 }
352
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_17_bw_76_ies_12)353 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_17_bw_76_ies_12) {
354 TransposeMicrokernelTester()
355 .input_stride(76)
356 .output_stride(17)
357 .block_width(76)
358 .block_height(17)
359 .element_size(1)
360 .input_element_stride(12)
361 .iterations(1)
362 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
363 }
364
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_3_bw_20_oes_12)365 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_3_bw_20_oes_12) {
366 TransposeMicrokernelTester()
367 .input_stride(20)
368 .output_stride(3)
369 .block_width(20)
370 .block_height(3)
371 .element_size(1)
372 .output_element_stride(12)
373 .iterations(1)
374 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
375 }
376
TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1,bh_7_bw_92_ies_18_oes_14)377 TEST(X8_TRANSPOSEC__1X4_SCALAR_INT_1, bh_7_bw_92_ies_18_oes_14) {
378 TransposeMicrokernelTester()
379 .input_stride(97)
380 .output_stride(13)
381 .block_width(92)
382 .block_height(7)
383 .element_size(1)
384 .input_element_stride(18)
385 .output_element_stride(14)
386 .iterations(1)
387 .Test(xnn_x8_transposec_ukernel__1x4_scalar_int);
388 }
389
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1)390 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1) {
391 TransposeMicrokernelTester()
392 .input_stride(2)
393 .output_stride(4)
394 .block_width(1)
395 .block_height(2)
396 .element_size(1)
397 .iterations(1)
398 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
399 }
400
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_1_4_bw_1_2)401 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_1_4_bw_1_2) {
402 for(size_t i = 1; i <= 4; ++i){
403 for(size_t j = 1; j <= 2; ++j){
404 TransposeMicrokernelTester()
405 .input_stride(j * 3)
406 .output_stride(i * 7)
407 .block_width(j)
408 .block_height(i)
409 .element_size(1)
410 .iterations(1)
411 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
412 }
413 }
414 }
415
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_2)416 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_2) {
417 TransposeMicrokernelTester()
418 .input_stride(2)
419 .output_stride(2)
420 .block_width(2)
421 .block_height(2)
422 .element_size(1)
423 .iterations(1)
424 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
425 }
426
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_2_2)427 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_2_2) {
428 for(size_t i = 2; i < 2; ++i){
429 TransposeMicrokernelTester()
430 .input_stride(i)
431 .output_stride(4)
432 .block_width(i)
433 .block_height(2)
434 .element_size(1)
435 .iterations(1)
436 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
437 }
438 }
439
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_4_bw_2_2)440 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_4_bw_2_2) {
441 for(size_t i = 2; i < 2; ++i){
442 TransposeMicrokernelTester()
443 .input_stride(i)
444 .output_stride(4)
445 .block_width(i)
446 .block_height(4)
447 .element_size(1)
448 .iterations(1)
449 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
450 }
451 }
452
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_4_bw_1)453 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_4_bw_1) {
454 TransposeMicrokernelTester()
455 .input_stride(1)
456 .output_stride(10)
457 .block_width(1)
458 .block_height(4)
459 .element_size(1)
460 .iterations(1)
461 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
462 }
463
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_3_4_bw_1)464 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_3_4_bw_1){
465 for(size_t i = 3; i < 4; ++i){
466 TransposeMicrokernelTester()
467 .input_stride(18)
468 .output_stride(i)
469 .block_width(4)
470 .block_height(i)
471 .element_size(1)
472 .iterations(1)
473 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
474 }
475 }
476
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_3_4_bw_2)477 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_3_4_bw_2){
478 for(size_t i = 3; i < 4; ++i){
479 TransposeMicrokernelTester()
480 .input_stride(2)
481 .output_stride(i)
482 .block_width(2)
483 .block_height(i)
484 .element_size(1)
485 .iterations(1)
486 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
487 }
488 }
489
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_3_4_bw_2_2)490 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_3_4_bw_2_2) {
491 for(size_t i = 3; i < 4; ++i){
492 for(size_t j = 2; j < 2; ++j){
493 TransposeMicrokernelTester()
494 .input_stride(j)
495 .output_stride(i)
496 .block_width(j)
497 .block_height(i)
498 .element_size(1)
499 .iterations(1)
500 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
501 }
502 }
503 }
504
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1_is_2)505 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_is_2) {
506 TransposeMicrokernelTester()
507 .input_stride(2)
508 .output_stride(2)
509 .block_width(1)
510 .block_height(2)
511 .element_size(1)
512 .iterations(1)
513 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
514 }
515
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1_os_4)516 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_os_4) {
517 TransposeMicrokernelTester()
518 .input_stride(1)
519 .output_stride(4)
520 .block_width(1)
521 .block_height(2)
522 .element_size(1)
523 .iterations(1)
524 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
525 }
526
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_2_bw_1_is_2_os_4)527 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_2_bw_1_is_2_os_4) {
528 TransposeMicrokernelTester()
529 .input_stride(2)
530 .output_stride(4)
531 .block_width(1)
532 .block_height(2)
533 .element_size(1)
534 .iterations(1)
535 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
536 }
537
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_34_bw_19_ies_12)538 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_34_bw_19_ies_12) {
539 TransposeMicrokernelTester()
540 .input_stride(19)
541 .output_stride(34)
542 .block_width(19)
543 .block_height(34)
544 .element_size(1)
545 .input_element_stride(12)
546 .iterations(1)
547 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
548 }
549
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_6_bw_5_oes_12)550 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_6_bw_5_oes_12) {
551 TransposeMicrokernelTester()
552 .input_stride(5)
553 .output_stride(6)
554 .block_width(5)
555 .block_height(6)
556 .element_size(1)
557 .output_element_stride(12)
558 .iterations(1)
559 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
560 }
561
TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1,bh_14_bw_23_ies_18_oes_14)562 TEST(X8_TRANSPOSEC__2X1_SCALAR_INT_1, bh_14_bw_23_ies_18_oes_14) {
563 TransposeMicrokernelTester()
564 .input_stride(28)
565 .output_stride(20)
566 .block_width(23)
567 .block_height(14)
568 .element_size(1)
569 .input_element_stride(18)
570 .output_element_stride(14)
571 .iterations(1)
572 .Test(xnn_x8_transposec_ukernel__2x1_scalar_int);
573 }
574
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2)575 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2) {
576 TransposeMicrokernelTester()
577 .input_stride(4)
578 .output_stride(4)
579 .block_width(2)
580 .block_height(2)
581 .element_size(1)
582 .iterations(1)
583 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
584 }
585
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_1_4_bw_1_4)586 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_1_4_bw_1_4) {
587 for(size_t i = 1; i <= 4; ++i){
588 for(size_t j = 1; j <= 4; ++j){
589 TransposeMicrokernelTester()
590 .input_stride(j * 3)
591 .output_stride(i * 7)
592 .block_width(j)
593 .block_height(i)
594 .element_size(1)
595 .iterations(1)
596 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
597 }
598 }
599 }
600
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_4)601 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_4) {
602 TransposeMicrokernelTester()
603 .input_stride(4)
604 .output_stride(2)
605 .block_width(4)
606 .block_height(2)
607 .element_size(1)
608 .iterations(1)
609 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
610 }
611
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_3_4)612 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_3_4) {
613 for(size_t i = 3; i < 4; ++i){
614 TransposeMicrokernelTester()
615 .input_stride(i)
616 .output_stride(4)
617 .block_width(i)
618 .block_height(2)
619 .element_size(1)
620 .iterations(1)
621 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
622 }
623 }
624
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_4_bw_3_4)625 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_4_bw_3_4) {
626 for(size_t i = 3; i < 4; ++i){
627 TransposeMicrokernelTester()
628 .input_stride(i)
629 .output_stride(4)
630 .block_width(i)
631 .block_height(4)
632 .element_size(1)
633 .iterations(1)
634 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
635 }
636 }
637
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_4_bw_2)638 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_4_bw_2) {
639 TransposeMicrokernelTester()
640 .input_stride(2)
641 .output_stride(10)
642 .block_width(2)
643 .block_height(4)
644 .element_size(1)
645 .iterations(1)
646 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
647 }
648
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_3_4_bw_2)649 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_3_4_bw_2){
650 for(size_t i = 3; i < 4; ++i){
651 TransposeMicrokernelTester()
652 .input_stride(19)
653 .output_stride(i)
654 .block_width(5)
655 .block_height(i)
656 .element_size(1)
657 .iterations(1)
658 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
659 }
660 }
661
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_3_4_bw_4)662 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_3_4_bw_4){
663 for(size_t i = 3; i < 4; ++i){
664 TransposeMicrokernelTester()
665 .input_stride(4)
666 .output_stride(i)
667 .block_width(4)
668 .block_height(i)
669 .element_size(1)
670 .iterations(1)
671 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
672 }
673 }
674
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_3_4_bw_3_4)675 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_3_4_bw_3_4) {
676 for(size_t i = 3; i < 4; ++i){
677 for(size_t j = 3; j < 4; ++j){
678 TransposeMicrokernelTester()
679 .input_stride(j)
680 .output_stride(i)
681 .block_width(j)
682 .block_height(i)
683 .element_size(1)
684 .iterations(1)
685 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
686 }
687 }
688 }
689
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2_is_4)690 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_is_4) {
691 TransposeMicrokernelTester()
692 .input_stride(4)
693 .output_stride(2)
694 .block_width(2)
695 .block_height(2)
696 .element_size(1)
697 .iterations(1)
698 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
699 }
700
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2_os_4)701 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_os_4) {
702 TransposeMicrokernelTester()
703 .input_stride(2)
704 .output_stride(4)
705 .block_width(2)
706 .block_height(2)
707 .element_size(1)
708 .iterations(1)
709 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
710 }
711
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_2_bw_2_is_4_os_4)712 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_2_bw_2_is_4_os_4) {
713 TransposeMicrokernelTester()
714 .input_stride(4)
715 .output_stride(4)
716 .block_width(2)
717 .block_height(2)
718 .element_size(1)
719 .iterations(1)
720 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
721 }
722
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_34_bw_38_ies_12)723 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_34_bw_38_ies_12) {
724 TransposeMicrokernelTester()
725 .input_stride(38)
726 .output_stride(34)
727 .block_width(38)
728 .block_height(34)
729 .element_size(1)
730 .input_element_stride(12)
731 .iterations(1)
732 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
733 }
734
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_6_bw_10_oes_12)735 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_6_bw_10_oes_12) {
736 TransposeMicrokernelTester()
737 .input_stride(10)
738 .output_stride(6)
739 .block_width(10)
740 .block_height(6)
741 .element_size(1)
742 .output_element_stride(12)
743 .iterations(1)
744 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
745 }
746
TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1,bh_14_bw_46_ies_18_oes_14)747 TEST(X8_TRANSPOSEC__2X2_SCALAR_INT_1, bh_14_bw_46_ies_18_oes_14) {
748 TransposeMicrokernelTester()
749 .input_stride(51)
750 .output_stride(20)
751 .block_width(46)
752 .block_height(14)
753 .element_size(1)
754 .input_element_stride(18)
755 .output_element_stride(14)
756 .iterations(1)
757 .Test(xnn_x8_transposec_ukernel__2x2_scalar_int);
758 }
759
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4)760 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4) {
761 TransposeMicrokernelTester()
762 .input_stride(8)
763 .output_stride(4)
764 .block_width(4)
765 .block_height(2)
766 .element_size(1)
767 .iterations(1)
768 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
769 }
770
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_1_4_bw_1_8)771 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_1_4_bw_1_8) {
772 for(size_t i = 1; i <= 4; ++i){
773 for(size_t j = 1; j <= 8; ++j){
774 TransposeMicrokernelTester()
775 .input_stride(j * 3)
776 .output_stride(i * 7)
777 .block_width(j)
778 .block_height(i)
779 .element_size(1)
780 .iterations(1)
781 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
782 }
783 }
784 }
785
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_8)786 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_8) {
787 TransposeMicrokernelTester()
788 .input_stride(8)
789 .output_stride(2)
790 .block_width(8)
791 .block_height(2)
792 .element_size(1)
793 .iterations(1)
794 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
795 }
796
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_5_8)797 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_5_8) {
798 for(size_t i = 5; i < 8; ++i){
799 TransposeMicrokernelTester()
800 .input_stride(i)
801 .output_stride(4)
802 .block_width(i)
803 .block_height(2)
804 .element_size(1)
805 .iterations(1)
806 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
807 }
808 }
809
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_4_bw_5_8)810 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_4_bw_5_8) {
811 for(size_t i = 5; i < 8; ++i){
812 TransposeMicrokernelTester()
813 .input_stride(i)
814 .output_stride(4)
815 .block_width(i)
816 .block_height(4)
817 .element_size(1)
818 .iterations(1)
819 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
820 }
821 }
822
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_4_bw_4)823 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_4_bw_4) {
824 TransposeMicrokernelTester()
825 .input_stride(4)
826 .output_stride(10)
827 .block_width(4)
828 .block_height(4)
829 .element_size(1)
830 .iterations(1)
831 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
832 }
833
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_3_4_bw_4)834 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_3_4_bw_4){
835 for(size_t i = 3; i < 4; ++i){
836 TransposeMicrokernelTester()
837 .input_stride(21)
838 .output_stride(i)
839 .block_width(7)
840 .block_height(i)
841 .element_size(1)
842 .iterations(1)
843 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
844 }
845 }
846
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_3_4_bw_8)847 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_3_4_bw_8){
848 for(size_t i = 3; i < 4; ++i){
849 TransposeMicrokernelTester()
850 .input_stride(8)
851 .output_stride(i)
852 .block_width(8)
853 .block_height(i)
854 .element_size(1)
855 .iterations(1)
856 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
857 }
858 }
859
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_3_4_bw_5_8)860 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_3_4_bw_5_8) {
861 for(size_t i = 3; i < 4; ++i){
862 for(size_t j = 5; j < 8; ++j){
863 TransposeMicrokernelTester()
864 .input_stride(j)
865 .output_stride(i)
866 .block_width(j)
867 .block_height(i)
868 .element_size(1)
869 .iterations(1)
870 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
871 }
872 }
873 }
874
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4_is_8)875 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_is_8) {
876 TransposeMicrokernelTester()
877 .input_stride(8)
878 .output_stride(2)
879 .block_width(4)
880 .block_height(2)
881 .element_size(1)
882 .iterations(1)
883 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
884 }
885
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4_os_4)886 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_os_4) {
887 TransposeMicrokernelTester()
888 .input_stride(4)
889 .output_stride(4)
890 .block_width(4)
891 .block_height(2)
892 .element_size(1)
893 .iterations(1)
894 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
895 }
896
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_2_bw_4_is_8_os_4)897 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_2_bw_4_is_8_os_4) {
898 TransposeMicrokernelTester()
899 .input_stride(8)
900 .output_stride(4)
901 .block_width(4)
902 .block_height(2)
903 .element_size(1)
904 .iterations(1)
905 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
906 }
907
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_34_bw_76_ies_12)908 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_34_bw_76_ies_12) {
909 TransposeMicrokernelTester()
910 .input_stride(76)
911 .output_stride(34)
912 .block_width(76)
913 .block_height(34)
914 .element_size(1)
915 .input_element_stride(12)
916 .iterations(1)
917 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
918 }
919
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_6_bw_20_oes_12)920 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_6_bw_20_oes_12) {
921 TransposeMicrokernelTester()
922 .input_stride(20)
923 .output_stride(6)
924 .block_width(20)
925 .block_height(6)
926 .element_size(1)
927 .output_element_stride(12)
928 .iterations(1)
929 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
930 }
931
TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1,bh_14_bw_92_ies_18_oes_14)932 TEST(X8_TRANSPOSEC__2X4_SCALAR_INT_1, bh_14_bw_92_ies_18_oes_14) {
933 TransposeMicrokernelTester()
934 .input_stride(97)
935 .output_stride(20)
936 .block_width(92)
937 .block_height(14)
938 .element_size(1)
939 .input_element_stride(18)
940 .output_element_stride(14)
941 .iterations(1)
942 .Test(xnn_x8_transposec_ukernel__2x4_scalar_int);
943 }
944
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1)945 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1) {
946 TransposeMicrokernelTester()
947 .input_stride(2)
948 .output_stride(8)
949 .block_width(1)
950 .block_height(4)
951 .element_size(1)
952 .iterations(1)
953 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
954 }
955
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_1_8_bw_1_2)956 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_1_8_bw_1_2) {
957 for(size_t i = 1; i <= 8; ++i){
958 for(size_t j = 1; j <= 2; ++j){
959 TransposeMicrokernelTester()
960 .input_stride(j * 3)
961 .output_stride(i * 7)
962 .block_width(j)
963 .block_height(i)
964 .element_size(1)
965 .iterations(1)
966 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
967 }
968 }
969 }
970
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_2)971 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_2) {
972 TransposeMicrokernelTester()
973 .input_stride(2)
974 .output_stride(4)
975 .block_width(2)
976 .block_height(4)
977 .element_size(1)
978 .iterations(1)
979 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
980 }
981
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_2_2)982 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_2_2) {
983 for(size_t i = 2; i < 2; ++i){
984 TransposeMicrokernelTester()
985 .input_stride(i)
986 .output_stride(8)
987 .block_width(i)
988 .block_height(4)
989 .element_size(1)
990 .iterations(1)
991 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
992 }
993 }
994
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_8_bw_2_2)995 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_8_bw_2_2) {
996 for(size_t i = 2; i < 2; ++i){
997 TransposeMicrokernelTester()
998 .input_stride(i)
999 .output_stride(8)
1000 .block_width(i)
1001 .block_height(8)
1002 .element_size(1)
1003 .iterations(1)
1004 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1005 }
1006 }
1007
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_8_bw_1)1008 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_8_bw_1) {
1009 TransposeMicrokernelTester()
1010 .input_stride(1)
1011 .output_stride(16)
1012 .block_width(1)
1013 .block_height(8)
1014 .element_size(1)
1015 .iterations(1)
1016 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1017 }
1018
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_5_8_bw_1)1019 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_5_8_bw_1){
1020 for(size_t i = 5; i < 8; ++i){
1021 TransposeMicrokernelTester()
1022 .input_stride(18)
1023 .output_stride(i)
1024 .block_width(4)
1025 .block_height(i)
1026 .element_size(1)
1027 .iterations(1)
1028 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1029 }
1030 }
1031
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_5_8_bw_2)1032 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_5_8_bw_2){
1033 for(size_t i = 5; i < 8; ++i){
1034 TransposeMicrokernelTester()
1035 .input_stride(2)
1036 .output_stride(i)
1037 .block_width(2)
1038 .block_height(i)
1039 .element_size(1)
1040 .iterations(1)
1041 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1042 }
1043 }
1044
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_5_8_bw_2_2)1045 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_5_8_bw_2_2) {
1046 for(size_t i = 5; i < 8; ++i){
1047 for(size_t j = 2; j < 2; ++j){
1048 TransposeMicrokernelTester()
1049 .input_stride(j)
1050 .output_stride(i)
1051 .block_width(j)
1052 .block_height(i)
1053 .element_size(1)
1054 .iterations(1)
1055 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1056 }
1057 }
1058 }
1059
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1_is_2)1060 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_is_2) {
1061 TransposeMicrokernelTester()
1062 .input_stride(2)
1063 .output_stride(4)
1064 .block_width(1)
1065 .block_height(4)
1066 .element_size(1)
1067 .iterations(1)
1068 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1069 }
1070
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1_os_8)1071 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_os_8) {
1072 TransposeMicrokernelTester()
1073 .input_stride(1)
1074 .output_stride(8)
1075 .block_width(1)
1076 .block_height(4)
1077 .element_size(1)
1078 .iterations(1)
1079 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1080 }
1081
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_4_bw_1_is_2_os_8)1082 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_4_bw_1_is_2_os_8) {
1083 TransposeMicrokernelTester()
1084 .input_stride(2)
1085 .output_stride(8)
1086 .block_width(1)
1087 .block_height(4)
1088 .element_size(1)
1089 .iterations(1)
1090 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1091 }
1092
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_68_bw_19_ies_12)1093 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_68_bw_19_ies_12) {
1094 TransposeMicrokernelTester()
1095 .input_stride(19)
1096 .output_stride(68)
1097 .block_width(19)
1098 .block_height(68)
1099 .element_size(1)
1100 .input_element_stride(12)
1101 .iterations(1)
1102 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1103 }
1104
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_12_bw_5_oes_12)1105 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_12_bw_5_oes_12) {
1106 TransposeMicrokernelTester()
1107 .input_stride(5)
1108 .output_stride(12)
1109 .block_width(5)
1110 .block_height(12)
1111 .element_size(1)
1112 .output_element_stride(12)
1113 .iterations(1)
1114 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1115 }
1116
TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1,bh_28_bw_23_ies_18_oes_14)1117 TEST(X8_TRANSPOSEC__4X1_SCALAR_INT_1, bh_28_bw_23_ies_18_oes_14) {
1118 TransposeMicrokernelTester()
1119 .input_stride(28)
1120 .output_stride(34)
1121 .block_width(23)
1122 .block_height(28)
1123 .element_size(1)
1124 .input_element_stride(18)
1125 .output_element_stride(14)
1126 .iterations(1)
1127 .Test(xnn_x8_transposec_ukernel__4x1_scalar_int);
1128 }
1129
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2)1130 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2) {
1131 TransposeMicrokernelTester()
1132 .input_stride(4)
1133 .output_stride(8)
1134 .block_width(2)
1135 .block_height(4)
1136 .element_size(1)
1137 .iterations(1)
1138 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1139 }
1140
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_1_8_bw_1_4)1141 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_1_8_bw_1_4) {
1142 for(size_t i = 1; i <= 8; ++i){
1143 for(size_t j = 1; j <= 4; ++j){
1144 TransposeMicrokernelTester()
1145 .input_stride(j * 3)
1146 .output_stride(i * 7)
1147 .block_width(j)
1148 .block_height(i)
1149 .element_size(1)
1150 .iterations(1)
1151 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1152 }
1153 }
1154 }
1155
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_4)1156 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_4) {
1157 TransposeMicrokernelTester()
1158 .input_stride(4)
1159 .output_stride(4)
1160 .block_width(4)
1161 .block_height(4)
1162 .element_size(1)
1163 .iterations(1)
1164 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1165 }
1166
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_3_4)1167 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_3_4) {
1168 for(size_t i = 3; i < 4; ++i){
1169 TransposeMicrokernelTester()
1170 .input_stride(i)
1171 .output_stride(8)
1172 .block_width(i)
1173 .block_height(4)
1174 .element_size(1)
1175 .iterations(1)
1176 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1177 }
1178 }
1179
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_8_bw_3_4)1180 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_8_bw_3_4) {
1181 for(size_t i = 3; i < 4; ++i){
1182 TransposeMicrokernelTester()
1183 .input_stride(i)
1184 .output_stride(8)
1185 .block_width(i)
1186 .block_height(8)
1187 .element_size(1)
1188 .iterations(1)
1189 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1190 }
1191 }
1192
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_8_bw_2)1193 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_8_bw_2) {
1194 TransposeMicrokernelTester()
1195 .input_stride(2)
1196 .output_stride(16)
1197 .block_width(2)
1198 .block_height(8)
1199 .element_size(1)
1200 .iterations(1)
1201 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1202 }
1203
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_5_8_bw_2)1204 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_5_8_bw_2){
1205 for(size_t i = 5; i < 8; ++i){
1206 TransposeMicrokernelTester()
1207 .input_stride(19)
1208 .output_stride(i)
1209 .block_width(5)
1210 .block_height(i)
1211 .element_size(1)
1212 .iterations(1)
1213 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1214 }
1215 }
1216
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_5_8_bw_4)1217 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_5_8_bw_4){
1218 for(size_t i = 5; i < 8; ++i){
1219 TransposeMicrokernelTester()
1220 .input_stride(4)
1221 .output_stride(i)
1222 .block_width(4)
1223 .block_height(i)
1224 .element_size(1)
1225 .iterations(1)
1226 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1227 }
1228 }
1229
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_5_8_bw_3_4)1230 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_5_8_bw_3_4) {
1231 for(size_t i = 5; i < 8; ++i){
1232 for(size_t j = 3; j < 4; ++j){
1233 TransposeMicrokernelTester()
1234 .input_stride(j)
1235 .output_stride(i)
1236 .block_width(j)
1237 .block_height(i)
1238 .element_size(1)
1239 .iterations(1)
1240 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1241 }
1242 }
1243 }
1244
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2_is_4)1245 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_is_4) {
1246 TransposeMicrokernelTester()
1247 .input_stride(4)
1248 .output_stride(4)
1249 .block_width(2)
1250 .block_height(4)
1251 .element_size(1)
1252 .iterations(1)
1253 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1254 }
1255
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2_os_8)1256 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_os_8) {
1257 TransposeMicrokernelTester()
1258 .input_stride(2)
1259 .output_stride(8)
1260 .block_width(2)
1261 .block_height(4)
1262 .element_size(1)
1263 .iterations(1)
1264 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1265 }
1266
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_4_bw_2_is_4_os_8)1267 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_4_bw_2_is_4_os_8) {
1268 TransposeMicrokernelTester()
1269 .input_stride(4)
1270 .output_stride(8)
1271 .block_width(2)
1272 .block_height(4)
1273 .element_size(1)
1274 .iterations(1)
1275 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1276 }
1277
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_68_bw_38_ies_12)1278 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_68_bw_38_ies_12) {
1279 TransposeMicrokernelTester()
1280 .input_stride(38)
1281 .output_stride(68)
1282 .block_width(38)
1283 .block_height(68)
1284 .element_size(1)
1285 .input_element_stride(12)
1286 .iterations(1)
1287 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1288 }
1289
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_12_bw_10_oes_12)1290 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_12_bw_10_oes_12) {
1291 TransposeMicrokernelTester()
1292 .input_stride(10)
1293 .output_stride(12)
1294 .block_width(10)
1295 .block_height(12)
1296 .element_size(1)
1297 .output_element_stride(12)
1298 .iterations(1)
1299 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1300 }
1301
TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1,bh_28_bw_46_ies_18_oes_14)1302 TEST(X8_TRANSPOSEC__4X2_SCALAR_INT_1, bh_28_bw_46_ies_18_oes_14) {
1303 TransposeMicrokernelTester()
1304 .input_stride(51)
1305 .output_stride(34)
1306 .block_width(46)
1307 .block_height(28)
1308 .element_size(1)
1309 .input_element_stride(18)
1310 .output_element_stride(14)
1311 .iterations(1)
1312 .Test(xnn_x8_transposec_ukernel__4x2_scalar_int);
1313 }
1314
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4)1315 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4) {
1316 TransposeMicrokernelTester()
1317 .input_stride(8)
1318 .output_stride(8)
1319 .block_width(4)
1320 .block_height(4)
1321 .element_size(1)
1322 .iterations(1)
1323 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1324 }
1325
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_1_8_bw_1_8)1326 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_1_8_bw_1_8) {
1327 for(size_t i = 1; i <= 8; ++i){
1328 for(size_t j = 1; j <= 8; ++j){
1329 TransposeMicrokernelTester()
1330 .input_stride(j * 3)
1331 .output_stride(i * 7)
1332 .block_width(j)
1333 .block_height(i)
1334 .element_size(1)
1335 .iterations(1)
1336 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1337 }
1338 }
1339 }
1340
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_8)1341 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_8) {
1342 TransposeMicrokernelTester()
1343 .input_stride(8)
1344 .output_stride(4)
1345 .block_width(8)
1346 .block_height(4)
1347 .element_size(1)
1348 .iterations(1)
1349 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1350 }
1351
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_5_8)1352 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_5_8) {
1353 for(size_t i = 5; i < 8; ++i){
1354 TransposeMicrokernelTester()
1355 .input_stride(i)
1356 .output_stride(8)
1357 .block_width(i)
1358 .block_height(4)
1359 .element_size(1)
1360 .iterations(1)
1361 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1362 }
1363 }
1364
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_8_bw_5_8)1365 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_8_bw_5_8) {
1366 for(size_t i = 5; i < 8; ++i){
1367 TransposeMicrokernelTester()
1368 .input_stride(i)
1369 .output_stride(8)
1370 .block_width(i)
1371 .block_height(8)
1372 .element_size(1)
1373 .iterations(1)
1374 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1375 }
1376 }
1377
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_8_bw_4)1378 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_8_bw_4) {
1379 TransposeMicrokernelTester()
1380 .input_stride(4)
1381 .output_stride(16)
1382 .block_width(4)
1383 .block_height(8)
1384 .element_size(1)
1385 .iterations(1)
1386 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1387 }
1388
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_5_8_bw_4)1389 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_5_8_bw_4){
1390 for(size_t i = 5; i < 8; ++i){
1391 TransposeMicrokernelTester()
1392 .input_stride(21)
1393 .output_stride(i)
1394 .block_width(7)
1395 .block_height(i)
1396 .element_size(1)
1397 .iterations(1)
1398 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1399 }
1400 }
1401
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_5_8_bw_8)1402 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_5_8_bw_8){
1403 for(size_t i = 5; i < 8; ++i){
1404 TransposeMicrokernelTester()
1405 .input_stride(8)
1406 .output_stride(i)
1407 .block_width(8)
1408 .block_height(i)
1409 .element_size(1)
1410 .iterations(1)
1411 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1412 }
1413 }
1414
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_5_8_bw_5_8)1415 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_5_8_bw_5_8) {
1416 for(size_t i = 5; i < 8; ++i){
1417 for(size_t j = 5; j < 8; ++j){
1418 TransposeMicrokernelTester()
1419 .input_stride(j)
1420 .output_stride(i)
1421 .block_width(j)
1422 .block_height(i)
1423 .element_size(1)
1424 .iterations(1)
1425 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1426 }
1427 }
1428 }
1429
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4_is_8)1430 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8) {
1431 TransposeMicrokernelTester()
1432 .input_stride(8)
1433 .output_stride(4)
1434 .block_width(4)
1435 .block_height(4)
1436 .element_size(1)
1437 .iterations(1)
1438 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1439 }
1440
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4_os_8)1441 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_os_8) {
1442 TransposeMicrokernelTester()
1443 .input_stride(4)
1444 .output_stride(8)
1445 .block_width(4)
1446 .block_height(4)
1447 .element_size(1)
1448 .iterations(1)
1449 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1450 }
1451
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_4_bw_4_is_8_os_8)1452 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_4_bw_4_is_8_os_8) {
1453 TransposeMicrokernelTester()
1454 .input_stride(8)
1455 .output_stride(8)
1456 .block_width(4)
1457 .block_height(4)
1458 .element_size(1)
1459 .iterations(1)
1460 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1461 }
1462
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_68_bw_76_ies_12)1463 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_68_bw_76_ies_12) {
1464 TransposeMicrokernelTester()
1465 .input_stride(76)
1466 .output_stride(68)
1467 .block_width(76)
1468 .block_height(68)
1469 .element_size(1)
1470 .input_element_stride(12)
1471 .iterations(1)
1472 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1473 }
1474
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_12_bw_20_oes_12)1475 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_12_bw_20_oes_12) {
1476 TransposeMicrokernelTester()
1477 .input_stride(20)
1478 .output_stride(12)
1479 .block_width(20)
1480 .block_height(12)
1481 .element_size(1)
1482 .output_element_stride(12)
1483 .iterations(1)
1484 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1485 }
1486
TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1,bh_28_bw_92_ies_18_oes_14)1487 TEST(X8_TRANSPOSEC__4X4_SCALAR_INT_1, bh_28_bw_92_ies_18_oes_14) {
1488 TransposeMicrokernelTester()
1489 .input_stride(97)
1490 .output_stride(34)
1491 .block_width(92)
1492 .block_height(28)
1493 .element_size(1)
1494 .input_element_stride(18)
1495 .output_element_stride(14)
1496 .iterations(1)
1497 .Test(xnn_x8_transposec_ukernel__4x4_scalar_int);
1498 }
1499
1500 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16)1501 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16) {
1502 TEST_REQUIRES_X86_SSE2;
1503 TransposeMicrokernelTester()
1504 .input_stride(32)
1505 .output_stride(32)
1506 .block_width(16)
1507 .block_height(16)
1508 .element_size(1)
1509 .iterations(1)
1510 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1511 }
1512
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_1_32_bw_1_32)1513 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_1_32_bw_1_32) {
1514 TEST_REQUIRES_X86_SSE2;
1515 for(size_t i = 1; i <= 32; ++i){
1516 for(size_t j = 1; j <= 32; ++j){
1517 TransposeMicrokernelTester()
1518 .input_stride(j * 3)
1519 .output_stride(i * 7)
1520 .block_width(j)
1521 .block_height(i)
1522 .element_size(1)
1523 .iterations(1)
1524 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1525 }
1526 }
1527 }
1528
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_32)1529 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_32) {
1530 TEST_REQUIRES_X86_SSE2;
1531 TransposeMicrokernelTester()
1532 .input_stride(32)
1533 .output_stride(16)
1534 .block_width(32)
1535 .block_height(16)
1536 .element_size(1)
1537 .iterations(1)
1538 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1539 }
1540
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_17_32)1541 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_17_32) {
1542 TEST_REQUIRES_X86_SSE2;
1543 for(size_t i = 17; i < 32; ++i){
1544 TransposeMicrokernelTester()
1545 .input_stride(i)
1546 .output_stride(32)
1547 .block_width(i)
1548 .block_height(16)
1549 .element_size(1)
1550 .iterations(1)
1551 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1552 }
1553 }
1554
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_32_bw_17_32)1555 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_32_bw_17_32) {
1556 TEST_REQUIRES_X86_SSE2;
1557 for(size_t i = 17; i < 32; ++i){
1558 TransposeMicrokernelTester()
1559 .input_stride(i)
1560 .output_stride(32)
1561 .block_width(i)
1562 .block_height(32)
1563 .element_size(1)
1564 .iterations(1)
1565 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1566 }
1567 }
1568
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_32_bw_16)1569 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_32_bw_16) {
1570 TEST_REQUIRES_X86_SSE2;
1571 TransposeMicrokernelTester()
1572 .input_stride(16)
1573 .output_stride(52)
1574 .block_width(16)
1575 .block_height(32)
1576 .element_size(1)
1577 .iterations(1)
1578 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1579 }
1580
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_17_32_bw_16)1581 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_17_32_bw_16){
1582 TEST_REQUIRES_X86_SSE2;
1583 for(size_t i = 17; i < 32; ++i){
1584 TransposeMicrokernelTester()
1585 .input_stride(33)
1586 .output_stride(i)
1587 .block_width(19)
1588 .block_height(i)
1589 .element_size(1)
1590 .iterations(1)
1591 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1592 }
1593 }
1594
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_17_32_bw_32)1595 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_17_32_bw_32){
1596 TEST_REQUIRES_X86_SSE2;
1597 for(size_t i = 17; i < 32; ++i){
1598 TransposeMicrokernelTester()
1599 .input_stride(32)
1600 .output_stride(i)
1601 .block_width(32)
1602 .block_height(i)
1603 .element_size(1)
1604 .iterations(1)
1605 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1606 }
1607 }
1608
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_17_32_bw_17_32)1609 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_17_32_bw_17_32) {
1610 TEST_REQUIRES_X86_SSE2;
1611 for(size_t i = 17; i < 32; ++i){
1612 for(size_t j = 17; j < 32; ++j){
1613 TransposeMicrokernelTester()
1614 .input_stride(j)
1615 .output_stride(i)
1616 .block_width(j)
1617 .block_height(i)
1618 .element_size(1)
1619 .iterations(1)
1620 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1621 }
1622 }
1623 }
1624
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16_is_32)1625 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16_is_32) {
1626 TEST_REQUIRES_X86_SSE2;
1627 TransposeMicrokernelTester()
1628 .input_stride(32)
1629 .output_stride(16)
1630 .block_width(16)
1631 .block_height(16)
1632 .element_size(1)
1633 .iterations(1)
1634 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1635 }
1636
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16_os_32)1637 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16_os_32) {
1638 TEST_REQUIRES_X86_SSE2;
1639 TransposeMicrokernelTester()
1640 .input_stride(16)
1641 .output_stride(32)
1642 .block_width(16)
1643 .block_height(16)
1644 .element_size(1)
1645 .iterations(1)
1646 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1647 }
1648
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_16_bw_16_is_32_os_32)1649 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_16_bw_16_is_32_os_32) {
1650 TEST_REQUIRES_X86_SSE2;
1651 TransposeMicrokernelTester()
1652 .input_stride(32)
1653 .output_stride(32)
1654 .block_width(16)
1655 .block_height(16)
1656 .element_size(1)
1657 .iterations(1)
1658 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1659 }
1660
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_272_bw_304_ies_12)1661 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_272_bw_304_ies_12) {
1662 TEST_REQUIRES_X86_SSE2;
1663 TransposeMicrokernelTester()
1664 .input_stride(304)
1665 .output_stride(272)
1666 .block_width(304)
1667 .block_height(272)
1668 .element_size(1)
1669 .input_element_stride(12)
1670 .iterations(1)
1671 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1672 }
1673
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_48_bw_80_oes_12)1674 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_48_bw_80_oes_12) {
1675 TEST_REQUIRES_X86_SSE2;
1676 TransposeMicrokernelTester()
1677 .input_stride(80)
1678 .output_stride(48)
1679 .block_width(80)
1680 .block_height(48)
1681 .element_size(1)
1682 .output_element_stride(12)
1683 .iterations(1)
1684 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1685 }
1686
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1,bh_112_bw_368_ies_18_oes_14)1687 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_SSE2_1, bh_112_bw_368_ies_18_oes_14) {
1688 TEST_REQUIRES_X86_SSE2;
1689 TransposeMicrokernelTester()
1690 .input_stride(373)
1691 .output_stride(118)
1692 .block_width(368)
1693 .block_height(112)
1694 .element_size(1)
1695 .input_element_stride(18)
1696 .output_element_stride(14)
1697 .iterations(1)
1698 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2);
1699 }
1700 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1701
1702
1703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16)1704 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16) {
1705 TEST_REQUIRES_X86_SSE2;
1706 TransposeMicrokernelTester()
1707 .input_stride(32)
1708 .output_stride(32)
1709 .block_width(16)
1710 .block_height(16)
1711 .element_size(1)
1712 .iterations(1)
1713 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1714 }
1715
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_1_32_bw_1_32)1716 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_1_32_bw_1_32) {
1717 TEST_REQUIRES_X86_SSE2;
1718 for(size_t i = 1; i <= 32; ++i){
1719 for(size_t j = 1; j <= 32; ++j){
1720 TransposeMicrokernelTester()
1721 .input_stride(j * 3)
1722 .output_stride(i * 7)
1723 .block_width(j)
1724 .block_height(i)
1725 .element_size(1)
1726 .iterations(1)
1727 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1728 }
1729 }
1730 }
1731
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_32)1732 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_32) {
1733 TEST_REQUIRES_X86_SSE2;
1734 TransposeMicrokernelTester()
1735 .input_stride(32)
1736 .output_stride(16)
1737 .block_width(32)
1738 .block_height(16)
1739 .element_size(1)
1740 .iterations(1)
1741 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1742 }
1743
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_17_32)1744 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_17_32) {
1745 TEST_REQUIRES_X86_SSE2;
1746 for(size_t i = 17; i < 32; ++i){
1747 TransposeMicrokernelTester()
1748 .input_stride(i)
1749 .output_stride(32)
1750 .block_width(i)
1751 .block_height(16)
1752 .element_size(1)
1753 .iterations(1)
1754 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1755 }
1756 }
1757
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_32_bw_17_32)1758 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_32_bw_17_32) {
1759 TEST_REQUIRES_X86_SSE2;
1760 for(size_t i = 17; i < 32; ++i){
1761 TransposeMicrokernelTester()
1762 .input_stride(i)
1763 .output_stride(32)
1764 .block_width(i)
1765 .block_height(32)
1766 .element_size(1)
1767 .iterations(1)
1768 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1769 }
1770 }
1771
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_32_bw_16)1772 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_32_bw_16) {
1773 TEST_REQUIRES_X86_SSE2;
1774 TransposeMicrokernelTester()
1775 .input_stride(16)
1776 .output_stride(52)
1777 .block_width(16)
1778 .block_height(32)
1779 .element_size(1)
1780 .iterations(1)
1781 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1782 }
1783
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_17_32_bw_16)1784 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_17_32_bw_16){
1785 TEST_REQUIRES_X86_SSE2;
1786 for(size_t i = 17; i < 32; ++i){
1787 TransposeMicrokernelTester()
1788 .input_stride(33)
1789 .output_stride(i)
1790 .block_width(19)
1791 .block_height(i)
1792 .element_size(1)
1793 .iterations(1)
1794 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1795 }
1796 }
1797
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_17_32_bw_32)1798 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_17_32_bw_32){
1799 TEST_REQUIRES_X86_SSE2;
1800 for(size_t i = 17; i < 32; ++i){
1801 TransposeMicrokernelTester()
1802 .input_stride(32)
1803 .output_stride(i)
1804 .block_width(32)
1805 .block_height(i)
1806 .element_size(1)
1807 .iterations(1)
1808 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1809 }
1810 }
1811
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_17_32_bw_17_32)1812 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_17_32_bw_17_32) {
1813 TEST_REQUIRES_X86_SSE2;
1814 for(size_t i = 17; i < 32; ++i){
1815 for(size_t j = 17; j < 32; ++j){
1816 TransposeMicrokernelTester()
1817 .input_stride(j)
1818 .output_stride(i)
1819 .block_width(j)
1820 .block_height(i)
1821 .element_size(1)
1822 .iterations(1)
1823 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1824 }
1825 }
1826 }
1827
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16_is_32)1828 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16_is_32) {
1829 TEST_REQUIRES_X86_SSE2;
1830 TransposeMicrokernelTester()
1831 .input_stride(32)
1832 .output_stride(16)
1833 .block_width(16)
1834 .block_height(16)
1835 .element_size(1)
1836 .iterations(1)
1837 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1838 }
1839
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16_os_32)1840 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16_os_32) {
1841 TEST_REQUIRES_X86_SSE2;
1842 TransposeMicrokernelTester()
1843 .input_stride(16)
1844 .output_stride(32)
1845 .block_width(16)
1846 .block_height(16)
1847 .element_size(1)
1848 .iterations(1)
1849 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1850 }
1851
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_16_bw_16_is_32_os_32)1852 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_16_bw_16_is_32_os_32) {
1853 TEST_REQUIRES_X86_SSE2;
1854 TransposeMicrokernelTester()
1855 .input_stride(32)
1856 .output_stride(32)
1857 .block_width(16)
1858 .block_height(16)
1859 .element_size(1)
1860 .iterations(1)
1861 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1862 }
1863
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_272_bw_304_ies_12)1864 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_272_bw_304_ies_12) {
1865 TEST_REQUIRES_X86_SSE2;
1866 TransposeMicrokernelTester()
1867 .input_stride(304)
1868 .output_stride(272)
1869 .block_width(304)
1870 .block_height(272)
1871 .element_size(1)
1872 .input_element_stride(12)
1873 .iterations(1)
1874 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1875 }
1876
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_48_bw_80_oes_12)1877 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_48_bw_80_oes_12) {
1878 TEST_REQUIRES_X86_SSE2;
1879 TransposeMicrokernelTester()
1880 .input_stride(80)
1881 .output_stride(48)
1882 .block_width(80)
1883 .block_height(48)
1884 .element_size(1)
1885 .output_element_stride(12)
1886 .iterations(1)
1887 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1888 }
1889
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1,bh_112_bw_368_ies_18_oes_14)1890 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_SSE2_1, bh_112_bw_368_ies_18_oes_14) {
1891 TEST_REQUIRES_X86_SSE2;
1892 TransposeMicrokernelTester()
1893 .input_stride(373)
1894 .output_stride(118)
1895 .block_width(368)
1896 .block_height(112)
1897 .element_size(1)
1898 .input_element_stride(18)
1899 .output_element_stride(14)
1900 .iterations(1)
1901 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2);
1902 }
1903 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1904
1905
1906 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16)1907 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16) {
1908 TransposeMicrokernelTester()
1909 .input_stride(32)
1910 .output_stride(32)
1911 .block_width(16)
1912 .block_height(16)
1913 .element_size(1)
1914 .iterations(1)
1915 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1916 }
1917
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_1_32_bw_1_32)1918 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_1_32_bw_1_32) {
1919 for(size_t i = 1; i <= 32; ++i){
1920 for(size_t j = 1; j <= 32; ++j){
1921 TransposeMicrokernelTester()
1922 .input_stride(j * 3)
1923 .output_stride(i * 7)
1924 .block_width(j)
1925 .block_height(i)
1926 .element_size(1)
1927 .iterations(1)
1928 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1929 }
1930 }
1931 }
1932
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_32)1933 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_32) {
1934 TransposeMicrokernelTester()
1935 .input_stride(32)
1936 .output_stride(16)
1937 .block_width(32)
1938 .block_height(16)
1939 .element_size(1)
1940 .iterations(1)
1941 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1942 }
1943
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_17_32)1944 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_17_32) {
1945 for(size_t i = 17; i < 32; ++i){
1946 TransposeMicrokernelTester()
1947 .input_stride(i)
1948 .output_stride(32)
1949 .block_width(i)
1950 .block_height(16)
1951 .element_size(1)
1952 .iterations(1)
1953 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1954 }
1955 }
1956
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_32_bw_17_32)1957 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_32_bw_17_32) {
1958 for(size_t i = 17; i < 32; ++i){
1959 TransposeMicrokernelTester()
1960 .input_stride(i)
1961 .output_stride(32)
1962 .block_width(i)
1963 .block_height(32)
1964 .element_size(1)
1965 .iterations(1)
1966 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1967 }
1968 }
1969
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_32_bw_16)1970 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_32_bw_16) {
1971 TransposeMicrokernelTester()
1972 .input_stride(16)
1973 .output_stride(52)
1974 .block_width(16)
1975 .block_height(32)
1976 .element_size(1)
1977 .iterations(1)
1978 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1979 }
1980
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_17_32_bw_16)1981 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_17_32_bw_16){
1982 for(size_t i = 17; i < 32; ++i){
1983 TransposeMicrokernelTester()
1984 .input_stride(33)
1985 .output_stride(i)
1986 .block_width(19)
1987 .block_height(i)
1988 .element_size(1)
1989 .iterations(1)
1990 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
1991 }
1992 }
1993
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_17_32_bw_32)1994 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_17_32_bw_32){
1995 for(size_t i = 17; i < 32; ++i){
1996 TransposeMicrokernelTester()
1997 .input_stride(32)
1998 .output_stride(i)
1999 .block_width(32)
2000 .block_height(i)
2001 .element_size(1)
2002 .iterations(1)
2003 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2004 }
2005 }
2006
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_17_32_bw_17_32)2007 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_17_32_bw_17_32) {
2008 for(size_t i = 17; i < 32; ++i){
2009 for(size_t j = 17; j < 32; ++j){
2010 TransposeMicrokernelTester()
2011 .input_stride(j)
2012 .output_stride(i)
2013 .block_width(j)
2014 .block_height(i)
2015 .element_size(1)
2016 .iterations(1)
2017 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2018 }
2019 }
2020 }
2021
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16_is_32)2022 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16_is_32) {
2023 TransposeMicrokernelTester()
2024 .input_stride(32)
2025 .output_stride(16)
2026 .block_width(16)
2027 .block_height(16)
2028 .element_size(1)
2029 .iterations(1)
2030 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2031 }
2032
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16_os_32)2033 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16_os_32) {
2034 TransposeMicrokernelTester()
2035 .input_stride(16)
2036 .output_stride(32)
2037 .block_width(16)
2038 .block_height(16)
2039 .element_size(1)
2040 .iterations(1)
2041 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2042 }
2043
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_16_bw_16_is_32_os_32)2044 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_16_bw_16_is_32_os_32) {
2045 TransposeMicrokernelTester()
2046 .input_stride(32)
2047 .output_stride(32)
2048 .block_width(16)
2049 .block_height(16)
2050 .element_size(1)
2051 .iterations(1)
2052 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2053 }
2054
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_272_bw_304_ies_12)2055 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_272_bw_304_ies_12) {
2056 TransposeMicrokernelTester()
2057 .input_stride(304)
2058 .output_stride(272)
2059 .block_width(304)
2060 .block_height(272)
2061 .element_size(1)
2062 .input_element_stride(12)
2063 .iterations(1)
2064 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2065 }
2066
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_48_bw_80_oes_12)2067 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_48_bw_80_oes_12) {
2068 TransposeMicrokernelTester()
2069 .input_stride(80)
2070 .output_stride(48)
2071 .block_width(80)
2072 .block_height(48)
2073 .element_size(1)
2074 .output_element_stride(12)
2075 .iterations(1)
2076 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2077 }
2078
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1,bh_112_bw_368_ies_18_oes_14)2079 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_WASMSIMD_1, bh_112_bw_368_ies_18_oes_14) {
2080 TransposeMicrokernelTester()
2081 .input_stride(373)
2082 .output_stride(118)
2083 .block_width(368)
2084 .block_height(112)
2085 .element_size(1)
2086 .input_element_stride(18)
2087 .output_element_stride(14)
2088 .iterations(1)
2089 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd);
2090 }
2091 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2092
2093
2094 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16)2095 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16) {
2096 TransposeMicrokernelTester()
2097 .input_stride(32)
2098 .output_stride(32)
2099 .block_width(16)
2100 .block_height(16)
2101 .element_size(1)
2102 .iterations(1)
2103 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2104 }
2105
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_1_32_bw_1_32)2106 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_1_32_bw_1_32) {
2107 for(size_t i = 1; i <= 32; ++i){
2108 for(size_t j = 1; j <= 32; ++j){
2109 TransposeMicrokernelTester()
2110 .input_stride(j * 3)
2111 .output_stride(i * 7)
2112 .block_width(j)
2113 .block_height(i)
2114 .element_size(1)
2115 .iterations(1)
2116 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2117 }
2118 }
2119 }
2120
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_32)2121 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_32) {
2122 TransposeMicrokernelTester()
2123 .input_stride(32)
2124 .output_stride(16)
2125 .block_width(32)
2126 .block_height(16)
2127 .element_size(1)
2128 .iterations(1)
2129 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2130 }
2131
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_17_32)2132 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_17_32) {
2133 for(size_t i = 17; i < 32; ++i){
2134 TransposeMicrokernelTester()
2135 .input_stride(i)
2136 .output_stride(32)
2137 .block_width(i)
2138 .block_height(16)
2139 .element_size(1)
2140 .iterations(1)
2141 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2142 }
2143 }
2144
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_32_bw_17_32)2145 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_32_bw_17_32) {
2146 for(size_t i = 17; i < 32; ++i){
2147 TransposeMicrokernelTester()
2148 .input_stride(i)
2149 .output_stride(32)
2150 .block_width(i)
2151 .block_height(32)
2152 .element_size(1)
2153 .iterations(1)
2154 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2155 }
2156 }
2157
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_32_bw_16)2158 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_32_bw_16) {
2159 TransposeMicrokernelTester()
2160 .input_stride(16)
2161 .output_stride(52)
2162 .block_width(16)
2163 .block_height(32)
2164 .element_size(1)
2165 .iterations(1)
2166 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2167 }
2168
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_17_32_bw_16)2169 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_17_32_bw_16){
2170 for(size_t i = 17; i < 32; ++i){
2171 TransposeMicrokernelTester()
2172 .input_stride(33)
2173 .output_stride(i)
2174 .block_width(19)
2175 .block_height(i)
2176 .element_size(1)
2177 .iterations(1)
2178 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2179 }
2180 }
2181
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_17_32_bw_32)2182 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_17_32_bw_32){
2183 for(size_t i = 17; i < 32; ++i){
2184 TransposeMicrokernelTester()
2185 .input_stride(32)
2186 .output_stride(i)
2187 .block_width(32)
2188 .block_height(i)
2189 .element_size(1)
2190 .iterations(1)
2191 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2192 }
2193 }
2194
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_17_32_bw_17_32)2195 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_17_32_bw_17_32) {
2196 for(size_t i = 17; i < 32; ++i){
2197 for(size_t j = 17; j < 32; ++j){
2198 TransposeMicrokernelTester()
2199 .input_stride(j)
2200 .output_stride(i)
2201 .block_width(j)
2202 .block_height(i)
2203 .element_size(1)
2204 .iterations(1)
2205 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2206 }
2207 }
2208 }
2209
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16_is_32)2210 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16_is_32) {
2211 TransposeMicrokernelTester()
2212 .input_stride(32)
2213 .output_stride(16)
2214 .block_width(16)
2215 .block_height(16)
2216 .element_size(1)
2217 .iterations(1)
2218 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2219 }
2220
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16_os_32)2221 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16_os_32) {
2222 TransposeMicrokernelTester()
2223 .input_stride(16)
2224 .output_stride(32)
2225 .block_width(16)
2226 .block_height(16)
2227 .element_size(1)
2228 .iterations(1)
2229 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2230 }
2231
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_16_bw_16_is_32_os_32)2232 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_16_bw_16_is_32_os_32) {
2233 TransposeMicrokernelTester()
2234 .input_stride(32)
2235 .output_stride(32)
2236 .block_width(16)
2237 .block_height(16)
2238 .element_size(1)
2239 .iterations(1)
2240 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2241 }
2242
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_272_bw_304_ies_12)2243 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_272_bw_304_ies_12) {
2244 TransposeMicrokernelTester()
2245 .input_stride(304)
2246 .output_stride(272)
2247 .block_width(304)
2248 .block_height(272)
2249 .element_size(1)
2250 .input_element_stride(12)
2251 .iterations(1)
2252 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2253 }
2254
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_48_bw_80_oes_12)2255 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_48_bw_80_oes_12) {
2256 TransposeMicrokernelTester()
2257 .input_stride(80)
2258 .output_stride(48)
2259 .block_width(80)
2260 .block_height(48)
2261 .element_size(1)
2262 .output_element_stride(12)
2263 .iterations(1)
2264 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2265 }
2266
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1,bh_112_bw_368_ies_18_oes_14)2267 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_WASMSIMD_1, bh_112_bw_368_ies_18_oes_14) {
2268 TransposeMicrokernelTester()
2269 .input_stride(373)
2270 .output_stride(118)
2271 .block_width(368)
2272 .block_height(112)
2273 .element_size(1)
2274 .input_element_stride(18)
2275 .output_element_stride(14)
2276 .iterations(1)
2277 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd);
2278 }
2279 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2280
2281
2282 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8)2283 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8) {
2284 TEST_REQUIRES_ARM_NEON;
2285 TransposeMicrokernelTester()
2286 .input_stride(16)
2287 .output_stride(16)
2288 .block_width(8)
2289 .block_height(8)
2290 .element_size(1)
2291 .iterations(1)
2292 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2293 }
2294
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_1_16_bw_1_16)2295 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_1_16_bw_1_16) {
2296 TEST_REQUIRES_ARM_NEON;
2297 for(size_t i = 1; i <= 16; ++i){
2298 for(size_t j = 1; j <= 16; ++j){
2299 TransposeMicrokernelTester()
2300 .input_stride(j * 3)
2301 .output_stride(i * 7)
2302 .block_width(j)
2303 .block_height(i)
2304 .element_size(1)
2305 .iterations(1)
2306 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2307 }
2308 }
2309 }
2310
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_16)2311 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_16) {
2312 TEST_REQUIRES_ARM_NEON;
2313 TransposeMicrokernelTester()
2314 .input_stride(16)
2315 .output_stride(8)
2316 .block_width(16)
2317 .block_height(8)
2318 .element_size(1)
2319 .iterations(1)
2320 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2321 }
2322
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_9_16)2323 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_9_16) {
2324 TEST_REQUIRES_ARM_NEON;
2325 for(size_t i = 9; i < 16; ++i){
2326 TransposeMicrokernelTester()
2327 .input_stride(i)
2328 .output_stride(16)
2329 .block_width(i)
2330 .block_height(8)
2331 .element_size(1)
2332 .iterations(1)
2333 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2334 }
2335 }
2336
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_16_bw_9_16)2337 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_16_bw_9_16) {
2338 TEST_REQUIRES_ARM_NEON;
2339 for(size_t i = 9; i < 16; ++i){
2340 TransposeMicrokernelTester()
2341 .input_stride(i)
2342 .output_stride(16)
2343 .block_width(i)
2344 .block_height(16)
2345 .element_size(1)
2346 .iterations(1)
2347 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2348 }
2349 }
2350
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_16_bw_8)2351 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_16_bw_8) {
2352 TEST_REQUIRES_ARM_NEON;
2353 TransposeMicrokernelTester()
2354 .input_stride(8)
2355 .output_stride(28)
2356 .block_width(8)
2357 .block_height(16)
2358 .element_size(1)
2359 .iterations(1)
2360 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2361 }
2362
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_9_16_bw_8)2363 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_9_16_bw_8){
2364 TEST_REQUIRES_ARM_NEON;
2365 for(size_t i = 9; i < 16; ++i){
2366 TransposeMicrokernelTester()
2367 .input_stride(25)
2368 .output_stride(i)
2369 .block_width(11)
2370 .block_height(i)
2371 .element_size(1)
2372 .iterations(1)
2373 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2374 }
2375 }
2376
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_9_16_bw_16)2377 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_9_16_bw_16){
2378 TEST_REQUIRES_ARM_NEON;
2379 for(size_t i = 9; i < 16; ++i){
2380 TransposeMicrokernelTester()
2381 .input_stride(16)
2382 .output_stride(i)
2383 .block_width(16)
2384 .block_height(i)
2385 .element_size(1)
2386 .iterations(1)
2387 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2388 }
2389 }
2390
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_9_16_bw_9_16)2391 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_9_16_bw_9_16) {
2392 TEST_REQUIRES_ARM_NEON;
2393 for(size_t i = 9; i < 16; ++i){
2394 for(size_t j = 9; j < 16; ++j){
2395 TransposeMicrokernelTester()
2396 .input_stride(j)
2397 .output_stride(i)
2398 .block_width(j)
2399 .block_height(i)
2400 .element_size(1)
2401 .iterations(1)
2402 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2403 }
2404 }
2405 }
2406
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8_is_16)2407 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8_is_16) {
2408 TEST_REQUIRES_ARM_NEON;
2409 TransposeMicrokernelTester()
2410 .input_stride(16)
2411 .output_stride(8)
2412 .block_width(8)
2413 .block_height(8)
2414 .element_size(1)
2415 .iterations(1)
2416 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2417 }
2418
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8_os_16)2419 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8_os_16) {
2420 TEST_REQUIRES_ARM_NEON;
2421 TransposeMicrokernelTester()
2422 .input_stride(8)
2423 .output_stride(16)
2424 .block_width(8)
2425 .block_height(8)
2426 .element_size(1)
2427 .iterations(1)
2428 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2429 }
2430
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)2431 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
2432 TEST_REQUIRES_ARM_NEON;
2433 TransposeMicrokernelTester()
2434 .input_stride(16)
2435 .output_stride(16)
2436 .block_width(8)
2437 .block_height(8)
2438 .element_size(1)
2439 .iterations(1)
2440 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2441 }
2442
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_136_bw_152_ies_12)2443 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_136_bw_152_ies_12) {
2444 TEST_REQUIRES_ARM_NEON;
2445 TransposeMicrokernelTester()
2446 .input_stride(152)
2447 .output_stride(136)
2448 .block_width(152)
2449 .block_height(136)
2450 .element_size(1)
2451 .input_element_stride(12)
2452 .iterations(1)
2453 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2454 }
2455
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_24_bw_40_oes_12)2456 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_24_bw_40_oes_12) {
2457 TEST_REQUIRES_ARM_NEON;
2458 TransposeMicrokernelTester()
2459 .input_stride(40)
2460 .output_stride(24)
2461 .block_width(40)
2462 .block_height(24)
2463 .element_size(1)
2464 .output_element_stride(12)
2465 .iterations(1)
2466 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2467 }
2468
TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)2469 TEST(X8_TRANSPOSEC__8X8_MULTI_DEC_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
2470 TEST_REQUIRES_ARM_NEON;
2471 TransposeMicrokernelTester()
2472 .input_stride(189)
2473 .output_stride(62)
2474 .block_width(184)
2475 .block_height(56)
2476 .element_size(1)
2477 .input_element_stride(18)
2478 .output_element_stride(14)
2479 .iterations(1)
2480 .Test(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon);
2481 }
2482 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2483
2484
2485 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8)2486 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8) {
2487 TEST_REQUIRES_ARM_NEON;
2488 TransposeMicrokernelTester()
2489 .input_stride(16)
2490 .output_stride(16)
2491 .block_width(8)
2492 .block_height(8)
2493 .element_size(1)
2494 .iterations(1)
2495 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2496 }
2497
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_1_16_bw_1_16)2498 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_1_16_bw_1_16) {
2499 TEST_REQUIRES_ARM_NEON;
2500 for(size_t i = 1; i <= 16; ++i){
2501 for(size_t j = 1; j <= 16; ++j){
2502 TransposeMicrokernelTester()
2503 .input_stride(j * 3)
2504 .output_stride(i * 7)
2505 .block_width(j)
2506 .block_height(i)
2507 .element_size(1)
2508 .iterations(1)
2509 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2510 }
2511 }
2512 }
2513
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_16)2514 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_16) {
2515 TEST_REQUIRES_ARM_NEON;
2516 TransposeMicrokernelTester()
2517 .input_stride(16)
2518 .output_stride(8)
2519 .block_width(16)
2520 .block_height(8)
2521 .element_size(1)
2522 .iterations(1)
2523 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2524 }
2525
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_9_16)2526 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_9_16) {
2527 TEST_REQUIRES_ARM_NEON;
2528 for(size_t i = 9; i < 16; ++i){
2529 TransposeMicrokernelTester()
2530 .input_stride(i)
2531 .output_stride(16)
2532 .block_width(i)
2533 .block_height(8)
2534 .element_size(1)
2535 .iterations(1)
2536 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2537 }
2538 }
2539
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_16_bw_9_16)2540 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_16_bw_9_16) {
2541 TEST_REQUIRES_ARM_NEON;
2542 for(size_t i = 9; i < 16; ++i){
2543 TransposeMicrokernelTester()
2544 .input_stride(i)
2545 .output_stride(16)
2546 .block_width(i)
2547 .block_height(16)
2548 .element_size(1)
2549 .iterations(1)
2550 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2551 }
2552 }
2553
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_16_bw_8)2554 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_16_bw_8) {
2555 TEST_REQUIRES_ARM_NEON;
2556 TransposeMicrokernelTester()
2557 .input_stride(8)
2558 .output_stride(28)
2559 .block_width(8)
2560 .block_height(16)
2561 .element_size(1)
2562 .iterations(1)
2563 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2564 }
2565
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_9_16_bw_8)2566 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_9_16_bw_8){
2567 TEST_REQUIRES_ARM_NEON;
2568 for(size_t i = 9; i < 16; ++i){
2569 TransposeMicrokernelTester()
2570 .input_stride(25)
2571 .output_stride(i)
2572 .block_width(11)
2573 .block_height(i)
2574 .element_size(1)
2575 .iterations(1)
2576 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2577 }
2578 }
2579
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_9_16_bw_16)2580 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_9_16_bw_16){
2581 TEST_REQUIRES_ARM_NEON;
2582 for(size_t i = 9; i < 16; ++i){
2583 TransposeMicrokernelTester()
2584 .input_stride(16)
2585 .output_stride(i)
2586 .block_width(16)
2587 .block_height(i)
2588 .element_size(1)
2589 .iterations(1)
2590 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2591 }
2592 }
2593
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_9_16_bw_9_16)2594 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_9_16_bw_9_16) {
2595 TEST_REQUIRES_ARM_NEON;
2596 for(size_t i = 9; i < 16; ++i){
2597 for(size_t j = 9; j < 16; ++j){
2598 TransposeMicrokernelTester()
2599 .input_stride(j)
2600 .output_stride(i)
2601 .block_width(j)
2602 .block_height(i)
2603 .element_size(1)
2604 .iterations(1)
2605 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2606 }
2607 }
2608 }
2609
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8_is_16)2610 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8_is_16) {
2611 TEST_REQUIRES_ARM_NEON;
2612 TransposeMicrokernelTester()
2613 .input_stride(16)
2614 .output_stride(8)
2615 .block_width(8)
2616 .block_height(8)
2617 .element_size(1)
2618 .iterations(1)
2619 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2620 }
2621
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8_os_16)2622 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8_os_16) {
2623 TEST_REQUIRES_ARM_NEON;
2624 TransposeMicrokernelTester()
2625 .input_stride(8)
2626 .output_stride(16)
2627 .block_width(8)
2628 .block_height(8)
2629 .element_size(1)
2630 .iterations(1)
2631 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2632 }
2633
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)2634 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
2635 TEST_REQUIRES_ARM_NEON;
2636 TransposeMicrokernelTester()
2637 .input_stride(16)
2638 .output_stride(16)
2639 .block_width(8)
2640 .block_height(8)
2641 .element_size(1)
2642 .iterations(1)
2643 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2644 }
2645
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_136_bw_152_ies_12)2646 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_136_bw_152_ies_12) {
2647 TEST_REQUIRES_ARM_NEON;
2648 TransposeMicrokernelTester()
2649 .input_stride(152)
2650 .output_stride(136)
2651 .block_width(152)
2652 .block_height(136)
2653 .element_size(1)
2654 .input_element_stride(12)
2655 .iterations(1)
2656 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2657 }
2658
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_24_bw_40_oes_12)2659 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_24_bw_40_oes_12) {
2660 TEST_REQUIRES_ARM_NEON;
2661 TransposeMicrokernelTester()
2662 .input_stride(40)
2663 .output_stride(24)
2664 .block_width(40)
2665 .block_height(24)
2666 .element_size(1)
2667 .output_element_stride(12)
2668 .iterations(1)
2669 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2670 }
2671
TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)2672 TEST(X8_TRANSPOSEC__8X8_MULTI_MOV_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
2673 TEST_REQUIRES_ARM_NEON;
2674 TransposeMicrokernelTester()
2675 .input_stride(189)
2676 .output_stride(62)
2677 .block_width(184)
2678 .block_height(56)
2679 .element_size(1)
2680 .input_element_stride(18)
2681 .output_element_stride(14)
2682 .iterations(1)
2683 .Test(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon);
2684 }
2685 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2686
2687
2688 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8)2689 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8) {
2690 TEST_REQUIRES_ARM_NEON;
2691 TransposeMicrokernelTester()
2692 .input_stride(16)
2693 .output_stride(16)
2694 .block_width(8)
2695 .block_height(8)
2696 .element_size(1)
2697 .iterations(1)
2698 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2699 }
2700
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_1_16_bw_1_16)2701 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_1_16_bw_1_16) {
2702 TEST_REQUIRES_ARM_NEON;
2703 for(size_t i = 1; i <= 16; ++i){
2704 for(size_t j = 1; j <= 16; ++j){
2705 TransposeMicrokernelTester()
2706 .input_stride(j * 3)
2707 .output_stride(i * 7)
2708 .block_width(j)
2709 .block_height(i)
2710 .element_size(1)
2711 .iterations(1)
2712 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2713 }
2714 }
2715 }
2716
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_16)2717 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_16) {
2718 TEST_REQUIRES_ARM_NEON;
2719 TransposeMicrokernelTester()
2720 .input_stride(16)
2721 .output_stride(8)
2722 .block_width(16)
2723 .block_height(8)
2724 .element_size(1)
2725 .iterations(1)
2726 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2727 }
2728
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_9_16)2729 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_9_16) {
2730 TEST_REQUIRES_ARM_NEON;
2731 for(size_t i = 9; i < 16; ++i){
2732 TransposeMicrokernelTester()
2733 .input_stride(i)
2734 .output_stride(16)
2735 .block_width(i)
2736 .block_height(8)
2737 .element_size(1)
2738 .iterations(1)
2739 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2740 }
2741 }
2742
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_16_bw_9_16)2743 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_16_bw_9_16) {
2744 TEST_REQUIRES_ARM_NEON;
2745 for(size_t i = 9; i < 16; ++i){
2746 TransposeMicrokernelTester()
2747 .input_stride(i)
2748 .output_stride(16)
2749 .block_width(i)
2750 .block_height(16)
2751 .element_size(1)
2752 .iterations(1)
2753 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2754 }
2755 }
2756
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_16_bw_8)2757 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_16_bw_8) {
2758 TEST_REQUIRES_ARM_NEON;
2759 TransposeMicrokernelTester()
2760 .input_stride(8)
2761 .output_stride(28)
2762 .block_width(8)
2763 .block_height(16)
2764 .element_size(1)
2765 .iterations(1)
2766 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2767 }
2768
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_9_16_bw_8)2769 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_9_16_bw_8){
2770 TEST_REQUIRES_ARM_NEON;
2771 for(size_t i = 9; i < 16; ++i){
2772 TransposeMicrokernelTester()
2773 .input_stride(25)
2774 .output_stride(i)
2775 .block_width(11)
2776 .block_height(i)
2777 .element_size(1)
2778 .iterations(1)
2779 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2780 }
2781 }
2782
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_9_16_bw_16)2783 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_9_16_bw_16){
2784 TEST_REQUIRES_ARM_NEON;
2785 for(size_t i = 9; i < 16; ++i){
2786 TransposeMicrokernelTester()
2787 .input_stride(16)
2788 .output_stride(i)
2789 .block_width(16)
2790 .block_height(i)
2791 .element_size(1)
2792 .iterations(1)
2793 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2794 }
2795 }
2796
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_9_16_bw_9_16)2797 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_9_16_bw_9_16) {
2798 TEST_REQUIRES_ARM_NEON;
2799 for(size_t i = 9; i < 16; ++i){
2800 for(size_t j = 9; j < 16; ++j){
2801 TransposeMicrokernelTester()
2802 .input_stride(j)
2803 .output_stride(i)
2804 .block_width(j)
2805 .block_height(i)
2806 .element_size(1)
2807 .iterations(1)
2808 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2809 }
2810 }
2811 }
2812
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16)2813 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16) {
2814 TEST_REQUIRES_ARM_NEON;
2815 TransposeMicrokernelTester()
2816 .input_stride(16)
2817 .output_stride(8)
2818 .block_width(8)
2819 .block_height(8)
2820 .element_size(1)
2821 .iterations(1)
2822 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2823 }
2824
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8_os_16)2825 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8_os_16) {
2826 TEST_REQUIRES_ARM_NEON;
2827 TransposeMicrokernelTester()
2828 .input_stride(8)
2829 .output_stride(16)
2830 .block_width(8)
2831 .block_height(8)
2832 .element_size(1)
2833 .iterations(1)
2834 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2835 }
2836
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)2837 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
2838 TEST_REQUIRES_ARM_NEON;
2839 TransposeMicrokernelTester()
2840 .input_stride(16)
2841 .output_stride(16)
2842 .block_width(8)
2843 .block_height(8)
2844 .element_size(1)
2845 .iterations(1)
2846 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2847 }
2848
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_136_bw_152_ies_12)2849 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_136_bw_152_ies_12) {
2850 TEST_REQUIRES_ARM_NEON;
2851 TransposeMicrokernelTester()
2852 .input_stride(152)
2853 .output_stride(136)
2854 .block_width(152)
2855 .block_height(136)
2856 .element_size(1)
2857 .input_element_stride(12)
2858 .iterations(1)
2859 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2860 }
2861
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_24_bw_40_oes_12)2862 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_24_bw_40_oes_12) {
2863 TEST_REQUIRES_ARM_NEON;
2864 TransposeMicrokernelTester()
2865 .input_stride(40)
2866 .output_stride(24)
2867 .block_width(40)
2868 .block_height(24)
2869 .element_size(1)
2870 .output_element_stride(12)
2871 .iterations(1)
2872 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2873 }
2874
TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)2875 TEST(X8_TRANSPOSEC__8X8_MULTI_SWITCH_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
2876 TEST_REQUIRES_ARM_NEON;
2877 TransposeMicrokernelTester()
2878 .input_stride(189)
2879 .output_stride(62)
2880 .block_width(184)
2881 .block_height(56)
2882 .element_size(1)
2883 .input_element_stride(18)
2884 .output_element_stride(14)
2885 .iterations(1)
2886 .Test(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon);
2887 }
2888 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2889
2890
2891 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8)2892 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8) {
2893 TEST_REQUIRES_ARM_NEON;
2894 TransposeMicrokernelTester()
2895 .input_stride(16)
2896 .output_stride(16)
2897 .block_width(8)
2898 .block_height(8)
2899 .element_size(1)
2900 .iterations(1)
2901 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2902 }
2903
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_1_16_bw_1_16)2904 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_1_16_bw_1_16) {
2905 TEST_REQUIRES_ARM_NEON;
2906 for(size_t i = 1; i <= 16; ++i){
2907 for(size_t j = 1; j <= 16; ++j){
2908 TransposeMicrokernelTester()
2909 .input_stride(j * 3)
2910 .output_stride(i * 7)
2911 .block_width(j)
2912 .block_height(i)
2913 .element_size(1)
2914 .iterations(1)
2915 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2916 }
2917 }
2918 }
2919
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_16)2920 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_16) {
2921 TEST_REQUIRES_ARM_NEON;
2922 TransposeMicrokernelTester()
2923 .input_stride(16)
2924 .output_stride(8)
2925 .block_width(16)
2926 .block_height(8)
2927 .element_size(1)
2928 .iterations(1)
2929 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2930 }
2931
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_9_16)2932 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_9_16) {
2933 TEST_REQUIRES_ARM_NEON;
2934 for(size_t i = 9; i < 16; ++i){
2935 TransposeMicrokernelTester()
2936 .input_stride(i)
2937 .output_stride(16)
2938 .block_width(i)
2939 .block_height(8)
2940 .element_size(1)
2941 .iterations(1)
2942 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2943 }
2944 }
2945
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_16_bw_9_16)2946 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_16_bw_9_16) {
2947 TEST_REQUIRES_ARM_NEON;
2948 for(size_t i = 9; i < 16; ++i){
2949 TransposeMicrokernelTester()
2950 .input_stride(i)
2951 .output_stride(16)
2952 .block_width(i)
2953 .block_height(16)
2954 .element_size(1)
2955 .iterations(1)
2956 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2957 }
2958 }
2959
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_16_bw_8)2960 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_16_bw_8) {
2961 TEST_REQUIRES_ARM_NEON;
2962 TransposeMicrokernelTester()
2963 .input_stride(8)
2964 .output_stride(28)
2965 .block_width(8)
2966 .block_height(16)
2967 .element_size(1)
2968 .iterations(1)
2969 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2970 }
2971
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_9_16_bw_8)2972 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_9_16_bw_8){
2973 TEST_REQUIRES_ARM_NEON;
2974 for(size_t i = 9; i < 16; ++i){
2975 TransposeMicrokernelTester()
2976 .input_stride(25)
2977 .output_stride(i)
2978 .block_width(11)
2979 .block_height(i)
2980 .element_size(1)
2981 .iterations(1)
2982 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2983 }
2984 }
2985
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_9_16_bw_16)2986 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_9_16_bw_16){
2987 TEST_REQUIRES_ARM_NEON;
2988 for(size_t i = 9; i < 16; ++i){
2989 TransposeMicrokernelTester()
2990 .input_stride(16)
2991 .output_stride(i)
2992 .block_width(16)
2993 .block_height(i)
2994 .element_size(1)
2995 .iterations(1)
2996 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
2997 }
2998 }
2999
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_9_16_bw_9_16)3000 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_9_16_bw_9_16) {
3001 TEST_REQUIRES_ARM_NEON;
3002 for(size_t i = 9; i < 16; ++i){
3003 for(size_t j = 9; j < 16; ++j){
3004 TransposeMicrokernelTester()
3005 .input_stride(j)
3006 .output_stride(i)
3007 .block_width(j)
3008 .block_height(i)
3009 .element_size(1)
3010 .iterations(1)
3011 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3012 }
3013 }
3014 }
3015
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8_is_16)3016 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8_is_16) {
3017 TEST_REQUIRES_ARM_NEON;
3018 TransposeMicrokernelTester()
3019 .input_stride(16)
3020 .output_stride(8)
3021 .block_width(8)
3022 .block_height(8)
3023 .element_size(1)
3024 .iterations(1)
3025 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3026 }
3027
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8_os_16)3028 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8_os_16) {
3029 TEST_REQUIRES_ARM_NEON;
3030 TransposeMicrokernelTester()
3031 .input_stride(8)
3032 .output_stride(16)
3033 .block_width(8)
3034 .block_height(8)
3035 .element_size(1)
3036 .iterations(1)
3037 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3038 }
3039
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3040 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3041 TEST_REQUIRES_ARM_NEON;
3042 TransposeMicrokernelTester()
3043 .input_stride(16)
3044 .output_stride(16)
3045 .block_width(8)
3046 .block_height(8)
3047 .element_size(1)
3048 .iterations(1)
3049 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3050 }
3051
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_136_bw_152_ies_12)3052 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3053 TEST_REQUIRES_ARM_NEON;
3054 TransposeMicrokernelTester()
3055 .input_stride(152)
3056 .output_stride(136)
3057 .block_width(152)
3058 .block_height(136)
3059 .element_size(1)
3060 .input_element_stride(12)
3061 .iterations(1)
3062 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3063 }
3064
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_24_bw_40_oes_12)3065 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3066 TEST_REQUIRES_ARM_NEON;
3067 TransposeMicrokernelTester()
3068 .input_stride(40)
3069 .output_stride(24)
3070 .block_width(40)
3071 .block_height(24)
3072 .element_size(1)
3073 .output_element_stride(12)
3074 .iterations(1)
3075 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3076 }
3077
TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3078 TEST(X8_TRANSPOSEC__8X8_REUSE_DEC_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3079 TEST_REQUIRES_ARM_NEON;
3080 TransposeMicrokernelTester()
3081 .input_stride(189)
3082 .output_stride(62)
3083 .block_width(184)
3084 .block_height(56)
3085 .element_size(1)
3086 .input_element_stride(18)
3087 .output_element_stride(14)
3088 .iterations(1)
3089 .Test(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon);
3090 }
3091 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3092
3093
3094 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8)3095 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8) {
3096 TEST_REQUIRES_ARM_NEON;
3097 TransposeMicrokernelTester()
3098 .input_stride(16)
3099 .output_stride(16)
3100 .block_width(8)
3101 .block_height(8)
3102 .element_size(1)
3103 .iterations(1)
3104 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3105 }
3106
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_1_16_bw_1_16)3107 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_1_16_bw_1_16) {
3108 TEST_REQUIRES_ARM_NEON;
3109 for(size_t i = 1; i <= 16; ++i){
3110 for(size_t j = 1; j <= 16; ++j){
3111 TransposeMicrokernelTester()
3112 .input_stride(j * 3)
3113 .output_stride(i * 7)
3114 .block_width(j)
3115 .block_height(i)
3116 .element_size(1)
3117 .iterations(1)
3118 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3119 }
3120 }
3121 }
3122
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_16)3123 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_16) {
3124 TEST_REQUIRES_ARM_NEON;
3125 TransposeMicrokernelTester()
3126 .input_stride(16)
3127 .output_stride(8)
3128 .block_width(16)
3129 .block_height(8)
3130 .element_size(1)
3131 .iterations(1)
3132 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3133 }
3134
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_9_16)3135 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_9_16) {
3136 TEST_REQUIRES_ARM_NEON;
3137 for(size_t i = 9; i < 16; ++i){
3138 TransposeMicrokernelTester()
3139 .input_stride(i)
3140 .output_stride(16)
3141 .block_width(i)
3142 .block_height(8)
3143 .element_size(1)
3144 .iterations(1)
3145 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3146 }
3147 }
3148
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_16_bw_9_16)3149 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_16_bw_9_16) {
3150 TEST_REQUIRES_ARM_NEON;
3151 for(size_t i = 9; i < 16; ++i){
3152 TransposeMicrokernelTester()
3153 .input_stride(i)
3154 .output_stride(16)
3155 .block_width(i)
3156 .block_height(16)
3157 .element_size(1)
3158 .iterations(1)
3159 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3160 }
3161 }
3162
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_16_bw_8)3163 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_16_bw_8) {
3164 TEST_REQUIRES_ARM_NEON;
3165 TransposeMicrokernelTester()
3166 .input_stride(8)
3167 .output_stride(28)
3168 .block_width(8)
3169 .block_height(16)
3170 .element_size(1)
3171 .iterations(1)
3172 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3173 }
3174
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_9_16_bw_8)3175 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_9_16_bw_8){
3176 TEST_REQUIRES_ARM_NEON;
3177 for(size_t i = 9; i < 16; ++i){
3178 TransposeMicrokernelTester()
3179 .input_stride(25)
3180 .output_stride(i)
3181 .block_width(11)
3182 .block_height(i)
3183 .element_size(1)
3184 .iterations(1)
3185 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3186 }
3187 }
3188
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_9_16_bw_16)3189 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_9_16_bw_16){
3190 TEST_REQUIRES_ARM_NEON;
3191 for(size_t i = 9; i < 16; ++i){
3192 TransposeMicrokernelTester()
3193 .input_stride(16)
3194 .output_stride(i)
3195 .block_width(16)
3196 .block_height(i)
3197 .element_size(1)
3198 .iterations(1)
3199 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3200 }
3201 }
3202
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_9_16_bw_9_16)3203 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_9_16_bw_9_16) {
3204 TEST_REQUIRES_ARM_NEON;
3205 for(size_t i = 9; i < 16; ++i){
3206 for(size_t j = 9; j < 16; ++j){
3207 TransposeMicrokernelTester()
3208 .input_stride(j)
3209 .output_stride(i)
3210 .block_width(j)
3211 .block_height(i)
3212 .element_size(1)
3213 .iterations(1)
3214 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3215 }
3216 }
3217 }
3218
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8_is_16)3219 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8_is_16) {
3220 TEST_REQUIRES_ARM_NEON;
3221 TransposeMicrokernelTester()
3222 .input_stride(16)
3223 .output_stride(8)
3224 .block_width(8)
3225 .block_height(8)
3226 .element_size(1)
3227 .iterations(1)
3228 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3229 }
3230
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8_os_16)3231 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8_os_16) {
3232 TEST_REQUIRES_ARM_NEON;
3233 TransposeMicrokernelTester()
3234 .input_stride(8)
3235 .output_stride(16)
3236 .block_width(8)
3237 .block_height(8)
3238 .element_size(1)
3239 .iterations(1)
3240 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3241 }
3242
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3243 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3244 TEST_REQUIRES_ARM_NEON;
3245 TransposeMicrokernelTester()
3246 .input_stride(16)
3247 .output_stride(16)
3248 .block_width(8)
3249 .block_height(8)
3250 .element_size(1)
3251 .iterations(1)
3252 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3253 }
3254
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_136_bw_152_ies_12)3255 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3256 TEST_REQUIRES_ARM_NEON;
3257 TransposeMicrokernelTester()
3258 .input_stride(152)
3259 .output_stride(136)
3260 .block_width(152)
3261 .block_height(136)
3262 .element_size(1)
3263 .input_element_stride(12)
3264 .iterations(1)
3265 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3266 }
3267
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_24_bw_40_oes_12)3268 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3269 TEST_REQUIRES_ARM_NEON;
3270 TransposeMicrokernelTester()
3271 .input_stride(40)
3272 .output_stride(24)
3273 .block_width(40)
3274 .block_height(24)
3275 .element_size(1)
3276 .output_element_stride(12)
3277 .iterations(1)
3278 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3279 }
3280
TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3281 TEST(X8_TRANSPOSEC__8X8_REUSE_MOV_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3282 TEST_REQUIRES_ARM_NEON;
3283 TransposeMicrokernelTester()
3284 .input_stride(189)
3285 .output_stride(62)
3286 .block_width(184)
3287 .block_height(56)
3288 .element_size(1)
3289 .input_element_stride(18)
3290 .output_element_stride(14)
3291 .iterations(1)
3292 .Test(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon);
3293 }
3294 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3295
3296
3297 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8)3298 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8) {
3299 TEST_REQUIRES_ARM_NEON;
3300 TransposeMicrokernelTester()
3301 .input_stride(16)
3302 .output_stride(16)
3303 .block_width(8)
3304 .block_height(8)
3305 .element_size(1)
3306 .iterations(1)
3307 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3308 }
3309
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_1_16_bw_1_16)3310 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_1_16_bw_1_16) {
3311 TEST_REQUIRES_ARM_NEON;
3312 for(size_t i = 1; i <= 16; ++i){
3313 for(size_t j = 1; j <= 16; ++j){
3314 TransposeMicrokernelTester()
3315 .input_stride(j * 3)
3316 .output_stride(i * 7)
3317 .block_width(j)
3318 .block_height(i)
3319 .element_size(1)
3320 .iterations(1)
3321 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3322 }
3323 }
3324 }
3325
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_16)3326 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_16) {
3327 TEST_REQUIRES_ARM_NEON;
3328 TransposeMicrokernelTester()
3329 .input_stride(16)
3330 .output_stride(8)
3331 .block_width(16)
3332 .block_height(8)
3333 .element_size(1)
3334 .iterations(1)
3335 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3336 }
3337
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_9_16)3338 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_9_16) {
3339 TEST_REQUIRES_ARM_NEON;
3340 for(size_t i = 9; i < 16; ++i){
3341 TransposeMicrokernelTester()
3342 .input_stride(i)
3343 .output_stride(16)
3344 .block_width(i)
3345 .block_height(8)
3346 .element_size(1)
3347 .iterations(1)
3348 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3349 }
3350 }
3351
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_16_bw_9_16)3352 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_16_bw_9_16) {
3353 TEST_REQUIRES_ARM_NEON;
3354 for(size_t i = 9; i < 16; ++i){
3355 TransposeMicrokernelTester()
3356 .input_stride(i)
3357 .output_stride(16)
3358 .block_width(i)
3359 .block_height(16)
3360 .element_size(1)
3361 .iterations(1)
3362 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3363 }
3364 }
3365
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_16_bw_8)3366 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_16_bw_8) {
3367 TEST_REQUIRES_ARM_NEON;
3368 TransposeMicrokernelTester()
3369 .input_stride(8)
3370 .output_stride(28)
3371 .block_width(8)
3372 .block_height(16)
3373 .element_size(1)
3374 .iterations(1)
3375 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3376 }
3377
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_9_16_bw_8)3378 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_9_16_bw_8){
3379 TEST_REQUIRES_ARM_NEON;
3380 for(size_t i = 9; i < 16; ++i){
3381 TransposeMicrokernelTester()
3382 .input_stride(25)
3383 .output_stride(i)
3384 .block_width(11)
3385 .block_height(i)
3386 .element_size(1)
3387 .iterations(1)
3388 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3389 }
3390 }
3391
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_9_16_bw_16)3392 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_9_16_bw_16){
3393 TEST_REQUIRES_ARM_NEON;
3394 for(size_t i = 9; i < 16; ++i){
3395 TransposeMicrokernelTester()
3396 .input_stride(16)
3397 .output_stride(i)
3398 .block_width(16)
3399 .block_height(i)
3400 .element_size(1)
3401 .iterations(1)
3402 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3403 }
3404 }
3405
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_9_16_bw_9_16)3406 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_9_16_bw_9_16) {
3407 TEST_REQUIRES_ARM_NEON;
3408 for(size_t i = 9; i < 16; ++i){
3409 for(size_t j = 9; j < 16; ++j){
3410 TransposeMicrokernelTester()
3411 .input_stride(j)
3412 .output_stride(i)
3413 .block_width(j)
3414 .block_height(i)
3415 .element_size(1)
3416 .iterations(1)
3417 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3418 }
3419 }
3420 }
3421
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8_is_16)3422 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8_is_16) {
3423 TEST_REQUIRES_ARM_NEON;
3424 TransposeMicrokernelTester()
3425 .input_stride(16)
3426 .output_stride(8)
3427 .block_width(8)
3428 .block_height(8)
3429 .element_size(1)
3430 .iterations(1)
3431 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3432 }
3433
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8_os_16)3434 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8_os_16) {
3435 TEST_REQUIRES_ARM_NEON;
3436 TransposeMicrokernelTester()
3437 .input_stride(8)
3438 .output_stride(16)
3439 .block_width(8)
3440 .block_height(8)
3441 .element_size(1)
3442 .iterations(1)
3443 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3444 }
3445
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3446 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3447 TEST_REQUIRES_ARM_NEON;
3448 TransposeMicrokernelTester()
3449 .input_stride(16)
3450 .output_stride(16)
3451 .block_width(8)
3452 .block_height(8)
3453 .element_size(1)
3454 .iterations(1)
3455 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3456 }
3457
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_136_bw_152_ies_12)3458 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3459 TEST_REQUIRES_ARM_NEON;
3460 TransposeMicrokernelTester()
3461 .input_stride(152)
3462 .output_stride(136)
3463 .block_width(152)
3464 .block_height(136)
3465 .element_size(1)
3466 .input_element_stride(12)
3467 .iterations(1)
3468 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3469 }
3470
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_24_bw_40_oes_12)3471 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3472 TEST_REQUIRES_ARM_NEON;
3473 TransposeMicrokernelTester()
3474 .input_stride(40)
3475 .output_stride(24)
3476 .block_width(40)
3477 .block_height(24)
3478 .element_size(1)
3479 .output_element_stride(12)
3480 .iterations(1)
3481 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3482 }
3483
TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3484 TEST(X8_TRANSPOSEC__8X8_REUSE_MULTI_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3485 TEST_REQUIRES_ARM_NEON;
3486 TransposeMicrokernelTester()
3487 .input_stride(189)
3488 .output_stride(62)
3489 .block_width(184)
3490 .block_height(56)
3491 .element_size(1)
3492 .input_element_stride(18)
3493 .output_element_stride(14)
3494 .iterations(1)
3495 .Test(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon);
3496 }
3497 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3498
3499
3500 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8)3501 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8) {
3502 TEST_REQUIRES_ARM_NEON;
3503 TransposeMicrokernelTester()
3504 .input_stride(16)
3505 .output_stride(16)
3506 .block_width(8)
3507 .block_height(8)
3508 .element_size(1)
3509 .iterations(1)
3510 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3511 }
3512
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_1_16_bw_1_16)3513 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_1_16_bw_1_16) {
3514 TEST_REQUIRES_ARM_NEON;
3515 for(size_t i = 1; i <= 16; ++i){
3516 for(size_t j = 1; j <= 16; ++j){
3517 TransposeMicrokernelTester()
3518 .input_stride(j * 3)
3519 .output_stride(i * 7)
3520 .block_width(j)
3521 .block_height(i)
3522 .element_size(1)
3523 .iterations(1)
3524 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3525 }
3526 }
3527 }
3528
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_16)3529 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_16) {
3530 TEST_REQUIRES_ARM_NEON;
3531 TransposeMicrokernelTester()
3532 .input_stride(16)
3533 .output_stride(8)
3534 .block_width(16)
3535 .block_height(8)
3536 .element_size(1)
3537 .iterations(1)
3538 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3539 }
3540
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_9_16)3541 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_9_16) {
3542 TEST_REQUIRES_ARM_NEON;
3543 for(size_t i = 9; i < 16; ++i){
3544 TransposeMicrokernelTester()
3545 .input_stride(i)
3546 .output_stride(16)
3547 .block_width(i)
3548 .block_height(8)
3549 .element_size(1)
3550 .iterations(1)
3551 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3552 }
3553 }
3554
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_9_16)3555 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_9_16) {
3556 TEST_REQUIRES_ARM_NEON;
3557 for(size_t i = 9; i < 16; ++i){
3558 TransposeMicrokernelTester()
3559 .input_stride(i)
3560 .output_stride(16)
3561 .block_width(i)
3562 .block_height(16)
3563 .element_size(1)
3564 .iterations(1)
3565 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3566 }
3567 }
3568
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_8)3569 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_8) {
3570 TEST_REQUIRES_ARM_NEON;
3571 TransposeMicrokernelTester()
3572 .input_stride(8)
3573 .output_stride(28)
3574 .block_width(8)
3575 .block_height(16)
3576 .element_size(1)
3577 .iterations(1)
3578 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3579 }
3580
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_9_16_bw_8)3581 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_9_16_bw_8){
3582 TEST_REQUIRES_ARM_NEON;
3583 for(size_t i = 9; i < 16; ++i){
3584 TransposeMicrokernelTester()
3585 .input_stride(25)
3586 .output_stride(i)
3587 .block_width(11)
3588 .block_height(i)
3589 .element_size(1)
3590 .iterations(1)
3591 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3592 }
3593 }
3594
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_9_16_bw_16)3595 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_9_16_bw_16){
3596 TEST_REQUIRES_ARM_NEON;
3597 for(size_t i = 9; i < 16; ++i){
3598 TransposeMicrokernelTester()
3599 .input_stride(16)
3600 .output_stride(i)
3601 .block_width(16)
3602 .block_height(i)
3603 .element_size(1)
3604 .iterations(1)
3605 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3606 }
3607 }
3608
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_9_16_bw_9_16)3609 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_9_16_bw_9_16) {
3610 TEST_REQUIRES_ARM_NEON;
3611 for(size_t i = 9; i < 16; ++i){
3612 for(size_t j = 9; j < 16; ++j){
3613 TransposeMicrokernelTester()
3614 .input_stride(j)
3615 .output_stride(i)
3616 .block_width(j)
3617 .block_height(i)
3618 .element_size(1)
3619 .iterations(1)
3620 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3621 }
3622 }
3623 }
3624
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16)3625 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16) {
3626 TEST_REQUIRES_ARM_NEON;
3627 TransposeMicrokernelTester()
3628 .input_stride(16)
3629 .output_stride(8)
3630 .block_width(8)
3631 .block_height(8)
3632 .element_size(1)
3633 .iterations(1)
3634 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3635 }
3636
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8_os_16)3637 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8_os_16) {
3638 TEST_REQUIRES_ARM_NEON;
3639 TransposeMicrokernelTester()
3640 .input_stride(8)
3641 .output_stride(16)
3642 .block_width(8)
3643 .block_height(8)
3644 .element_size(1)
3645 .iterations(1)
3646 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3647 }
3648
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_8_bw_8_is_16_os_16)3649 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_8_bw_8_is_16_os_16) {
3650 TEST_REQUIRES_ARM_NEON;
3651 TransposeMicrokernelTester()
3652 .input_stride(16)
3653 .output_stride(16)
3654 .block_width(8)
3655 .block_height(8)
3656 .element_size(1)
3657 .iterations(1)
3658 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3659 }
3660
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_136_bw_152_ies_12)3661 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_136_bw_152_ies_12) {
3662 TEST_REQUIRES_ARM_NEON;
3663 TransposeMicrokernelTester()
3664 .input_stride(152)
3665 .output_stride(136)
3666 .block_width(152)
3667 .block_height(136)
3668 .element_size(1)
3669 .input_element_stride(12)
3670 .iterations(1)
3671 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3672 }
3673
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_24_bw_40_oes_12)3674 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_24_bw_40_oes_12) {
3675 TEST_REQUIRES_ARM_NEON;
3676 TransposeMicrokernelTester()
3677 .input_stride(40)
3678 .output_stride(24)
3679 .block_width(40)
3680 .block_height(24)
3681 .element_size(1)
3682 .output_element_stride(12)
3683 .iterations(1)
3684 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3685 }
3686
TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1,bh_56_bw_184_ies_18_oes_14)3687 TEST(X8_TRANSPOSEC__8X8_REUSE_SWITCH_ZIP_NEON_1, bh_56_bw_184_ies_18_oes_14) {
3688 TEST_REQUIRES_ARM_NEON;
3689 TransposeMicrokernelTester()
3690 .input_stride(189)
3691 .output_stride(62)
3692 .block_width(184)
3693 .block_height(56)
3694 .element_size(1)
3695 .input_element_stride(18)
3696 .output_element_stride(14)
3697 .iterations(1)
3698 .Test(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon);
3699 }
3700 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3701
3702
3703 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16)3704 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16) {
3705 TEST_REQUIRES_ARM_NEON;
3706 TransposeMicrokernelTester()
3707 .input_stride(32)
3708 .output_stride(32)
3709 .block_width(16)
3710 .block_height(16)
3711 .element_size(1)
3712 .iterations(1)
3713 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3714 }
3715
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_1_32_bw_1_32)3716 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_1_32_bw_1_32) {
3717 TEST_REQUIRES_ARM_NEON;
3718 for(size_t i = 1; i <= 32; ++i){
3719 for(size_t j = 1; j <= 32; ++j){
3720 TransposeMicrokernelTester()
3721 .input_stride(j * 3)
3722 .output_stride(i * 7)
3723 .block_width(j)
3724 .block_height(i)
3725 .element_size(1)
3726 .iterations(1)
3727 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3728 }
3729 }
3730 }
3731
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_32)3732 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_32) {
3733 TEST_REQUIRES_ARM_NEON;
3734 TransposeMicrokernelTester()
3735 .input_stride(32)
3736 .output_stride(16)
3737 .block_width(32)
3738 .block_height(16)
3739 .element_size(1)
3740 .iterations(1)
3741 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3742 }
3743
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_17_32)3744 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_17_32) {
3745 TEST_REQUIRES_ARM_NEON;
3746 for(size_t i = 17; i < 32; ++i){
3747 TransposeMicrokernelTester()
3748 .input_stride(i)
3749 .output_stride(32)
3750 .block_width(i)
3751 .block_height(16)
3752 .element_size(1)
3753 .iterations(1)
3754 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3755 }
3756 }
3757
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_32_bw_17_32)3758 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_32_bw_17_32) {
3759 TEST_REQUIRES_ARM_NEON;
3760 for(size_t i = 17; i < 32; ++i){
3761 TransposeMicrokernelTester()
3762 .input_stride(i)
3763 .output_stride(32)
3764 .block_width(i)
3765 .block_height(32)
3766 .element_size(1)
3767 .iterations(1)
3768 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3769 }
3770 }
3771
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_32_bw_16)3772 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_32_bw_16) {
3773 TEST_REQUIRES_ARM_NEON;
3774 TransposeMicrokernelTester()
3775 .input_stride(16)
3776 .output_stride(52)
3777 .block_width(16)
3778 .block_height(32)
3779 .element_size(1)
3780 .iterations(1)
3781 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3782 }
3783
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_17_32_bw_16)3784 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_17_32_bw_16){
3785 TEST_REQUIRES_ARM_NEON;
3786 for(size_t i = 17; i < 32; ++i){
3787 TransposeMicrokernelTester()
3788 .input_stride(33)
3789 .output_stride(i)
3790 .block_width(19)
3791 .block_height(i)
3792 .element_size(1)
3793 .iterations(1)
3794 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3795 }
3796 }
3797
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_17_32_bw_32)3798 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_17_32_bw_32){
3799 TEST_REQUIRES_ARM_NEON;
3800 for(size_t i = 17; i < 32; ++i){
3801 TransposeMicrokernelTester()
3802 .input_stride(32)
3803 .output_stride(i)
3804 .block_width(32)
3805 .block_height(i)
3806 .element_size(1)
3807 .iterations(1)
3808 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3809 }
3810 }
3811
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_17_32_bw_17_32)3812 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_17_32_bw_17_32) {
3813 TEST_REQUIRES_ARM_NEON;
3814 for(size_t i = 17; i < 32; ++i){
3815 for(size_t j = 17; j < 32; ++j){
3816 TransposeMicrokernelTester()
3817 .input_stride(j)
3818 .output_stride(i)
3819 .block_width(j)
3820 .block_height(i)
3821 .element_size(1)
3822 .iterations(1)
3823 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3824 }
3825 }
3826 }
3827
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16_is_32)3828 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16_is_32) {
3829 TEST_REQUIRES_ARM_NEON;
3830 TransposeMicrokernelTester()
3831 .input_stride(32)
3832 .output_stride(16)
3833 .block_width(16)
3834 .block_height(16)
3835 .element_size(1)
3836 .iterations(1)
3837 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3838 }
3839
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16_os_32)3840 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16_os_32) {
3841 TEST_REQUIRES_ARM_NEON;
3842 TransposeMicrokernelTester()
3843 .input_stride(16)
3844 .output_stride(32)
3845 .block_width(16)
3846 .block_height(16)
3847 .element_size(1)
3848 .iterations(1)
3849 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3850 }
3851
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_16_bw_16_is_32_os_32)3852 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_16_bw_16_is_32_os_32) {
3853 TEST_REQUIRES_ARM_NEON;
3854 TransposeMicrokernelTester()
3855 .input_stride(32)
3856 .output_stride(32)
3857 .block_width(16)
3858 .block_height(16)
3859 .element_size(1)
3860 .iterations(1)
3861 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3862 }
3863
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_272_bw_304_ies_12)3864 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_272_bw_304_ies_12) {
3865 TEST_REQUIRES_ARM_NEON;
3866 TransposeMicrokernelTester()
3867 .input_stride(304)
3868 .output_stride(272)
3869 .block_width(304)
3870 .block_height(272)
3871 .element_size(1)
3872 .input_element_stride(12)
3873 .iterations(1)
3874 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3875 }
3876
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_48_bw_80_oes_12)3877 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_48_bw_80_oes_12) {
3878 TEST_REQUIRES_ARM_NEON;
3879 TransposeMicrokernelTester()
3880 .input_stride(80)
3881 .output_stride(48)
3882 .block_width(80)
3883 .block_height(48)
3884 .element_size(1)
3885 .output_element_stride(12)
3886 .iterations(1)
3887 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3888 }
3889
TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1,bh_112_bw_368_ies_18_oes_14)3890 TEST(X8_TRANSPOSEC__16X16_REUSE_DEC_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
3891 TEST_REQUIRES_ARM_NEON;
3892 TransposeMicrokernelTester()
3893 .input_stride(373)
3894 .output_stride(118)
3895 .block_width(368)
3896 .block_height(112)
3897 .element_size(1)
3898 .input_element_stride(18)
3899 .output_element_stride(14)
3900 .iterations(1)
3901 .Test(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon);
3902 }
3903 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3904
3905
3906 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16)3907 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16) {
3908 TEST_REQUIRES_ARM_NEON;
3909 TransposeMicrokernelTester()
3910 .input_stride(32)
3911 .output_stride(32)
3912 .block_width(16)
3913 .block_height(16)
3914 .element_size(1)
3915 .iterations(1)
3916 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3917 }
3918
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_1_32_bw_1_32)3919 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_1_32_bw_1_32) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for(size_t i = 1; i <= 32; ++i){
3922 for(size_t j = 1; j <= 32; ++j){
3923 TransposeMicrokernelTester()
3924 .input_stride(j * 3)
3925 .output_stride(i * 7)
3926 .block_width(j)
3927 .block_height(i)
3928 .element_size(1)
3929 .iterations(1)
3930 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3931 }
3932 }
3933 }
3934
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_32)3935 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_32) {
3936 TEST_REQUIRES_ARM_NEON;
3937 TransposeMicrokernelTester()
3938 .input_stride(32)
3939 .output_stride(16)
3940 .block_width(32)
3941 .block_height(16)
3942 .element_size(1)
3943 .iterations(1)
3944 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3945 }
3946
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_17_32)3947 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_17_32) {
3948 TEST_REQUIRES_ARM_NEON;
3949 for(size_t i = 17; i < 32; ++i){
3950 TransposeMicrokernelTester()
3951 .input_stride(i)
3952 .output_stride(32)
3953 .block_width(i)
3954 .block_height(16)
3955 .element_size(1)
3956 .iterations(1)
3957 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3958 }
3959 }
3960
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_32_bw_17_32)3961 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_32_bw_17_32) {
3962 TEST_REQUIRES_ARM_NEON;
3963 for(size_t i = 17; i < 32; ++i){
3964 TransposeMicrokernelTester()
3965 .input_stride(i)
3966 .output_stride(32)
3967 .block_width(i)
3968 .block_height(32)
3969 .element_size(1)
3970 .iterations(1)
3971 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3972 }
3973 }
3974
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_32_bw_16)3975 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_32_bw_16) {
3976 TEST_REQUIRES_ARM_NEON;
3977 TransposeMicrokernelTester()
3978 .input_stride(16)
3979 .output_stride(52)
3980 .block_width(16)
3981 .block_height(32)
3982 .element_size(1)
3983 .iterations(1)
3984 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3985 }
3986
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_17_32_bw_16)3987 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_17_32_bw_16){
3988 TEST_REQUIRES_ARM_NEON;
3989 for(size_t i = 17; i < 32; ++i){
3990 TransposeMicrokernelTester()
3991 .input_stride(33)
3992 .output_stride(i)
3993 .block_width(19)
3994 .block_height(i)
3995 .element_size(1)
3996 .iterations(1)
3997 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
3998 }
3999 }
4000
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_17_32_bw_32)4001 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_17_32_bw_32){
4002 TEST_REQUIRES_ARM_NEON;
4003 for(size_t i = 17; i < 32; ++i){
4004 TransposeMicrokernelTester()
4005 .input_stride(32)
4006 .output_stride(i)
4007 .block_width(32)
4008 .block_height(i)
4009 .element_size(1)
4010 .iterations(1)
4011 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4012 }
4013 }
4014
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_17_32_bw_17_32)4015 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_17_32_bw_17_32) {
4016 TEST_REQUIRES_ARM_NEON;
4017 for(size_t i = 17; i < 32; ++i){
4018 for(size_t j = 17; j < 32; ++j){
4019 TransposeMicrokernelTester()
4020 .input_stride(j)
4021 .output_stride(i)
4022 .block_width(j)
4023 .block_height(i)
4024 .element_size(1)
4025 .iterations(1)
4026 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4027 }
4028 }
4029 }
4030
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16_is_32)4031 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16_is_32) {
4032 TEST_REQUIRES_ARM_NEON;
4033 TransposeMicrokernelTester()
4034 .input_stride(32)
4035 .output_stride(16)
4036 .block_width(16)
4037 .block_height(16)
4038 .element_size(1)
4039 .iterations(1)
4040 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4041 }
4042
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16_os_32)4043 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16_os_32) {
4044 TEST_REQUIRES_ARM_NEON;
4045 TransposeMicrokernelTester()
4046 .input_stride(16)
4047 .output_stride(32)
4048 .block_width(16)
4049 .block_height(16)
4050 .element_size(1)
4051 .iterations(1)
4052 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4053 }
4054
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_16_bw_16_is_32_os_32)4055 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_16_bw_16_is_32_os_32) {
4056 TEST_REQUIRES_ARM_NEON;
4057 TransposeMicrokernelTester()
4058 .input_stride(32)
4059 .output_stride(32)
4060 .block_width(16)
4061 .block_height(16)
4062 .element_size(1)
4063 .iterations(1)
4064 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4065 }
4066
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_272_bw_304_ies_12)4067 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_272_bw_304_ies_12) {
4068 TEST_REQUIRES_ARM_NEON;
4069 TransposeMicrokernelTester()
4070 .input_stride(304)
4071 .output_stride(272)
4072 .block_width(304)
4073 .block_height(272)
4074 .element_size(1)
4075 .input_element_stride(12)
4076 .iterations(1)
4077 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4078 }
4079
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_48_bw_80_oes_12)4080 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_48_bw_80_oes_12) {
4081 TEST_REQUIRES_ARM_NEON;
4082 TransposeMicrokernelTester()
4083 .input_stride(80)
4084 .output_stride(48)
4085 .block_width(80)
4086 .block_height(48)
4087 .element_size(1)
4088 .output_element_stride(12)
4089 .iterations(1)
4090 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4091 }
4092
TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1,bh_112_bw_368_ies_18_oes_14)4093 TEST(X8_TRANSPOSEC__16X16_REUSE_MOV_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
4094 TEST_REQUIRES_ARM_NEON;
4095 TransposeMicrokernelTester()
4096 .input_stride(373)
4097 .output_stride(118)
4098 .block_width(368)
4099 .block_height(112)
4100 .element_size(1)
4101 .input_element_stride(18)
4102 .output_element_stride(14)
4103 .iterations(1)
4104 .Test(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon);
4105 }
4106 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4107
4108
4109 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16)4110 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16) {
4111 TEST_REQUIRES_ARM_NEON;
4112 TransposeMicrokernelTester()
4113 .input_stride(32)
4114 .output_stride(32)
4115 .block_width(16)
4116 .block_height(16)
4117 .element_size(1)
4118 .iterations(1)
4119 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4120 }
4121
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_1_32_bw_1_32)4122 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_1_32_bw_1_32) {
4123 TEST_REQUIRES_ARM_NEON;
4124 for(size_t i = 1; i <= 32; ++i){
4125 for(size_t j = 1; j <= 32; ++j){
4126 TransposeMicrokernelTester()
4127 .input_stride(j * 3)
4128 .output_stride(i * 7)
4129 .block_width(j)
4130 .block_height(i)
4131 .element_size(1)
4132 .iterations(1)
4133 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4134 }
4135 }
4136 }
4137
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_32)4138 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_32) {
4139 TEST_REQUIRES_ARM_NEON;
4140 TransposeMicrokernelTester()
4141 .input_stride(32)
4142 .output_stride(16)
4143 .block_width(32)
4144 .block_height(16)
4145 .element_size(1)
4146 .iterations(1)
4147 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4148 }
4149
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_17_32)4150 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_17_32) {
4151 TEST_REQUIRES_ARM_NEON;
4152 for(size_t i = 17; i < 32; ++i){
4153 TransposeMicrokernelTester()
4154 .input_stride(i)
4155 .output_stride(32)
4156 .block_width(i)
4157 .block_height(16)
4158 .element_size(1)
4159 .iterations(1)
4160 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4161 }
4162 }
4163
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_32_bw_17_32)4164 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_32_bw_17_32) {
4165 TEST_REQUIRES_ARM_NEON;
4166 for(size_t i = 17; i < 32; ++i){
4167 TransposeMicrokernelTester()
4168 .input_stride(i)
4169 .output_stride(32)
4170 .block_width(i)
4171 .block_height(32)
4172 .element_size(1)
4173 .iterations(1)
4174 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4175 }
4176 }
4177
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_32_bw_16)4178 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_32_bw_16) {
4179 TEST_REQUIRES_ARM_NEON;
4180 TransposeMicrokernelTester()
4181 .input_stride(16)
4182 .output_stride(52)
4183 .block_width(16)
4184 .block_height(32)
4185 .element_size(1)
4186 .iterations(1)
4187 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4188 }
4189
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_17_32_bw_16)4190 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_17_32_bw_16){
4191 TEST_REQUIRES_ARM_NEON;
4192 for(size_t i = 17; i < 32; ++i){
4193 TransposeMicrokernelTester()
4194 .input_stride(33)
4195 .output_stride(i)
4196 .block_width(19)
4197 .block_height(i)
4198 .element_size(1)
4199 .iterations(1)
4200 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4201 }
4202 }
4203
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_17_32_bw_32)4204 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_17_32_bw_32){
4205 TEST_REQUIRES_ARM_NEON;
4206 for(size_t i = 17; i < 32; ++i){
4207 TransposeMicrokernelTester()
4208 .input_stride(32)
4209 .output_stride(i)
4210 .block_width(32)
4211 .block_height(i)
4212 .element_size(1)
4213 .iterations(1)
4214 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4215 }
4216 }
4217
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_17_32_bw_17_32)4218 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_17_32_bw_17_32) {
4219 TEST_REQUIRES_ARM_NEON;
4220 for(size_t i = 17; i < 32; ++i){
4221 for(size_t j = 17; j < 32; ++j){
4222 TransposeMicrokernelTester()
4223 .input_stride(j)
4224 .output_stride(i)
4225 .block_width(j)
4226 .block_height(i)
4227 .element_size(1)
4228 .iterations(1)
4229 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4230 }
4231 }
4232 }
4233
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16_is_32)4234 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16_is_32) {
4235 TEST_REQUIRES_ARM_NEON;
4236 TransposeMicrokernelTester()
4237 .input_stride(32)
4238 .output_stride(16)
4239 .block_width(16)
4240 .block_height(16)
4241 .element_size(1)
4242 .iterations(1)
4243 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4244 }
4245
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16_os_32)4246 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16_os_32) {
4247 TEST_REQUIRES_ARM_NEON;
4248 TransposeMicrokernelTester()
4249 .input_stride(16)
4250 .output_stride(32)
4251 .block_width(16)
4252 .block_height(16)
4253 .element_size(1)
4254 .iterations(1)
4255 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4256 }
4257
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_16_bw_16_is_32_os_32)4258 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_16_bw_16_is_32_os_32) {
4259 TEST_REQUIRES_ARM_NEON;
4260 TransposeMicrokernelTester()
4261 .input_stride(32)
4262 .output_stride(32)
4263 .block_width(16)
4264 .block_height(16)
4265 .element_size(1)
4266 .iterations(1)
4267 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4268 }
4269
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_272_bw_304_ies_12)4270 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_272_bw_304_ies_12) {
4271 TEST_REQUIRES_ARM_NEON;
4272 TransposeMicrokernelTester()
4273 .input_stride(304)
4274 .output_stride(272)
4275 .block_width(304)
4276 .block_height(272)
4277 .element_size(1)
4278 .input_element_stride(12)
4279 .iterations(1)
4280 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4281 }
4282
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_48_bw_80_oes_12)4283 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_48_bw_80_oes_12) {
4284 TEST_REQUIRES_ARM_NEON;
4285 TransposeMicrokernelTester()
4286 .input_stride(80)
4287 .output_stride(48)
4288 .block_width(80)
4289 .block_height(48)
4290 .element_size(1)
4291 .output_element_stride(12)
4292 .iterations(1)
4293 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4294 }
4295
TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1,bh_112_bw_368_ies_18_oes_14)4296 TEST(X8_TRANSPOSEC__16X16_REUSE_SWITCH_ZIP_NEON_1, bh_112_bw_368_ies_18_oes_14) {
4297 TEST_REQUIRES_ARM_NEON;
4298 TransposeMicrokernelTester()
4299 .input_stride(373)
4300 .output_stride(118)
4301 .block_width(368)
4302 .block_height(112)
4303 .element_size(1)
4304 .input_element_stride(18)
4305 .output_element_stride(14)
4306 .iterations(1)
4307 .Test(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon);
4308 }
4309 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4310