1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <gtest/gtest.h>
7
8 #include <xnnpack/params.h>
9
10 #include "deconvolution-operator-tester.h"
11
12
13 constexpr size_t kUnstridedInputHeight = 8;
14 constexpr size_t kUnstridedInputWidth = 7;
15 constexpr size_t kStridedInputHeight = 6;
16 constexpr size_t kStridedInputWidth = 5;
17
18
19 /**************************** Future GEMM path ****************************/
20
21 TEST(DECONVOLUTION_NHWC_QS8, 1x1) {
22 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
23 DeconvolutionOperatorTester()
24 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
25 .kernel_size(1, 1)
26 .group_input_channels(23)
27 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
28 .iterations(3)
29 .TestQS8();
30 }
31
32 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_input_width) {
33 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
34 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
35 DeconvolutionOperatorTester()
36 .input_size(input_height, kUnstridedInputWidth)
37 .kernel_size(1, 1)
38 .group_input_channels(23)
39 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
40 .iterations(1)
41 .TestQS8();
42 }
43 }
44
45 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_input_height) {
46 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
47 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
48 DeconvolutionOperatorTester()
49 .input_size(kUnstridedInputHeight, input_width)
50 .kernel_size(1, 1)
51 .group_input_channels(23)
52 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
53 .iterations(1)
54 .TestQS8();
55 }
56 }
57
58 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_input_channels) {
59 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
60 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
61 DeconvolutionOperatorTester()
62 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
63 .kernel_size(1, 1)
64 .group_input_channels(input_channels)
65 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
66 .iterations(1)
67 .TestQS8();
68 }
69 }
70
71 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_output_channels) {
72 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
73 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
74 DeconvolutionOperatorTester()
75 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
76 .kernel_size(1, 1)
77 .group_input_channels(23)
78 .group_output_channels(output_channels)
79 .iterations(1)
80 .TestQS8();
81 }
82 }
83
84 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_input_stride) {
85 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
86 DeconvolutionOperatorTester()
87 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
88 .kernel_size(1, 1)
89 .group_input_channels(23)
90 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
91 .input_pixel_stride(28)
92 .iterations(3)
93 .TestQS8();
94 }
95
96 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_output_stride) {
97 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
98 DeconvolutionOperatorTester()
99 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
100 .kernel_size(1, 1)
101 .group_input_channels(23)
102 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
103 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
104 .iterations(3)
105 .TestQS8();
106 }
107
108 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_qmin) {
109 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
110 DeconvolutionOperatorTester()
111 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
112 .kernel_size(1, 1)
113 .group_input_channels(23)
114 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
115 .qmin(128)
116 .iterations(3)
117 .TestQS8();
118 }
119
120 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_qmax) {
121 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
122 DeconvolutionOperatorTester()
123 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
124 .kernel_size(1, 1)
125 .group_input_channels(23)
126 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
127 .qmax(128)
128 .iterations(3)
129 .TestQS8();
130 }
131
132 TEST(DECONVOLUTION_NHWC_QS8, 1x1_without_bias) {
133 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
134 DeconvolutionOperatorTester()
135 .has_bias(false)
136 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
137 .kernel_size(1, 1)
138 .group_input_channels(23)
139 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
140 .iterations(3)
141 .TestQS8();
142 }
143
144 /**************************** Future GEMM path, grouped ****************************/
145
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1)146 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1) {
147 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
148 DeconvolutionOperatorTester()
149 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
150 .kernel_size(1, 1)
151 .groups(2)
152 .group_input_channels(23)
153 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
154 .iterations(3)
155 .TestQS8();
156 }
157
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_input_width)158 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_input_width) {
159 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
160 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
161 DeconvolutionOperatorTester()
162 .input_size(input_height, kUnstridedInputWidth)
163 .kernel_size(1, 1)
164 .groups(2)
165 .group_input_channels(23)
166 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
167 .iterations(1)
168 .TestQS8();
169 }
170 }
171
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_input_height)172 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_input_height) {
173 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
174 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
175 DeconvolutionOperatorTester()
176 .input_size(kUnstridedInputHeight, input_width)
177 .kernel_size(1, 1)
178 .groups(2)
179 .group_input_channels(23)
180 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
181 .iterations(1)
182 .TestQS8();
183 }
184 }
185
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_input_channels)186 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_input_channels) {
187 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
188 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
189 DeconvolutionOperatorTester()
190 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
191 .kernel_size(1, 1)
192 .groups(2)
193 .group_input_channels(input_channels)
194 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
195 .iterations(1)
196 .TestQS8();
197 }
198 }
199
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_output_channels)200 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_output_channels) {
201 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
202 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
203 DeconvolutionOperatorTester()
204 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
205 .kernel_size(1, 1)
206 .groups(2)
207 .group_input_channels(23)
208 .group_output_channels(output_channels)
209 .iterations(1)
210 .TestQS8();
211 }
212 }
213
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_input_stride)214 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_input_stride) {
215 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
216 DeconvolutionOperatorTester()
217 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
218 .kernel_size(1, 1)
219 .groups(2)
220 .group_input_channels(23)
221 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
222 .input_pixel_stride(47)
223 .iterations(3)
224 .TestQS8();
225 }
226
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_output_stride)227 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_output_stride) {
228 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
229 DeconvolutionOperatorTester()
230 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
231 .kernel_size(1, 1)
232 .groups(2)
233 .group_input_channels(23)
234 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
235 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
236 .iterations(3)
237 .TestQS8();
238 }
239
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_qmin)240 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_qmin) {
241 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
242 DeconvolutionOperatorTester()
243 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
244 .kernel_size(1, 1)
245 .groups(2)
246 .group_input_channels(23)
247 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
248 .qmin(128)
249 .iterations(3)
250 .TestQS8();
251 }
252
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_qmax)253 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_qmax) {
254 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
255 DeconvolutionOperatorTester()
256 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
257 .kernel_size(1, 1)
258 .groups(2)
259 .group_input_channels(23)
260 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
261 .qmax(128)
262 .iterations(3)
263 .TestQS8();
264 }
265
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_without_bias)266 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_without_bias) {
267 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
268 DeconvolutionOperatorTester()
269 .has_bias(false)
270 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
271 .kernel_size(1, 1)
272 .groups(2)
273 .group_input_channels(23)
274 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
275 .iterations(3)
276 .TestQS8();
277 }
278
279 /**************************** Future GEMM path, batched ****************************/
280
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1)281 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1) {
282 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
283 DeconvolutionOperatorTester()
284 .batch_size(2)
285 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
286 .kernel_size(1, 1)
287 .group_input_channels(23)
288 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
289 .iterations(3)
290 .TestQS8();
291 }
292
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_input_width)293 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_input_width) {
294 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
295 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
296 DeconvolutionOperatorTester()
297 .batch_size(2)
298 .input_size(input_height, kUnstridedInputWidth)
299 .kernel_size(1, 1)
300 .group_input_channels(23)
301 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
302 .iterations(1)
303 .TestQS8();
304 }
305 }
306
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_input_height)307 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_input_height) {
308 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
309 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
310 DeconvolutionOperatorTester()
311 .batch_size(2)
312 .input_size(kUnstridedInputHeight, input_width)
313 .kernel_size(1, 1)
314 .group_input_channels(23)
315 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
316 .iterations(1)
317 .TestQS8();
318 }
319 }
320
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_input_channels)321 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_input_channels) {
322 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
323 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
324 DeconvolutionOperatorTester()
325 .batch_size(2)
326 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
327 .kernel_size(1, 1)
328 .group_input_channels(input_channels)
329 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
330 .iterations(1)
331 .TestQS8();
332 }
333 }
334
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_output_channels)335 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_output_channels) {
336 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
337 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
338 DeconvolutionOperatorTester()
339 .batch_size(2)
340 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
341 .kernel_size(1, 1)
342 .group_input_channels(23)
343 .group_output_channels(output_channels)
344 .iterations(1)
345 .TestQS8();
346 }
347 }
348
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_input_stride)349 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_input_stride) {
350 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
351 DeconvolutionOperatorTester()
352 .batch_size(2)
353 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
354 .kernel_size(1, 1)
355 .group_input_channels(23)
356 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
357 .input_pixel_stride(28)
358 .iterations(3)
359 .TestQS8();
360 }
361
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_output_stride)362 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_output_stride) {
363 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
364 DeconvolutionOperatorTester()
365 .batch_size(2)
366 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
367 .kernel_size(1, 1)
368 .group_input_channels(23)
369 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
370 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
371 .iterations(3)
372 .TestQS8();
373 }
374
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_qmin)375 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_qmin) {
376 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
377 DeconvolutionOperatorTester()
378 .batch_size(2)
379 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
380 .kernel_size(1, 1)
381 .group_input_channels(23)
382 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
383 .qmin(128)
384 .iterations(3)
385 .TestQS8();
386 }
387
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_qmax)388 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_qmax) {
389 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
390 DeconvolutionOperatorTester()
391 .batch_size(2)
392 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
393 .kernel_size(1, 1)
394 .group_input_channels(23)
395 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
396 .qmax(128)
397 .iterations(3)
398 .TestQS8();
399 }
400
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_without_bias)401 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_without_bias) {
402 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
403 DeconvolutionOperatorTester()
404 .has_bias(false)
405 .batch_size(2)
406 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
407 .kernel_size(1, 1)
408 .group_input_channels(23)
409 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
410 .iterations(3)
411 .TestQS8();
412 }
413
414 /**************************** Future GEMM path, batched, grouped ****************************/
415
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1)416 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1) {
417 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
418 DeconvolutionOperatorTester()
419 .batch_size(2)
420 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
421 .kernel_size(1, 1)
422 .groups(2)
423 .group_input_channels(23)
424 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
425 .iterations(3)
426 .TestQS8();
427 }
428
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_input_width)429 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_input_width) {
430 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
431 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
432 DeconvolutionOperatorTester()
433 .batch_size(2)
434 .input_size(input_height, kUnstridedInputWidth)
435 .kernel_size(1, 1)
436 .groups(2)
437 .group_input_channels(23)
438 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
439 .iterations(1)
440 .TestQS8();
441 }
442 }
443
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_input_height)444 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_input_height) {
445 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
446 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
447 DeconvolutionOperatorTester()
448 .batch_size(2)
449 .input_size(kUnstridedInputHeight, input_width)
450 .kernel_size(1, 1)
451 .groups(2)
452 .group_input_channels(23)
453 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
454 .iterations(1)
455 .TestQS8();
456 }
457 }
458
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_input_channels)459 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_input_channels) {
460 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
461 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
462 DeconvolutionOperatorTester()
463 .batch_size(2)
464 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
465 .kernel_size(1, 1)
466 .groups(2)
467 .group_input_channels(input_channels)
468 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
469 .iterations(1)
470 .TestQS8();
471 }
472 }
473
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_output_channels)474 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_output_channels) {
475 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
476 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
477 DeconvolutionOperatorTester()
478 .batch_size(2)
479 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
480 .kernel_size(1, 1)
481 .groups(2)
482 .group_input_channels(23)
483 .group_output_channels(output_channels)
484 .iterations(1)
485 .TestQS8();
486 }
487 }
488
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_input_stride)489 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_input_stride) {
490 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
491 DeconvolutionOperatorTester()
492 .batch_size(2)
493 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
494 .kernel_size(1, 1)
495 .groups(2)
496 .group_input_channels(23)
497 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
498 .input_pixel_stride(47)
499 .iterations(3)
500 .TestQS8();
501 }
502
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_output_stride)503 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_output_stride) {
504 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
505 DeconvolutionOperatorTester()
506 .batch_size(2)
507 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
508 .kernel_size(1, 1)
509 .groups(2)
510 .group_input_channels(23)
511 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
512 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
513 .iterations(3)
514 .TestQS8();
515 }
516
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_qmin)517 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_qmin) {
518 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
519 DeconvolutionOperatorTester()
520 .batch_size(2)
521 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
522 .kernel_size(1, 1)
523 .groups(2)
524 .group_input_channels(23)
525 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
526 .qmin(128)
527 .iterations(3)
528 .TestQS8();
529 }
530
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_qmax)531 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_qmax) {
532 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
533 DeconvolutionOperatorTester()
534 .batch_size(2)
535 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
536 .kernel_size(1, 1)
537 .groups(2)
538 .group_input_channels(23)
539 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
540 .qmax(128)
541 .iterations(3)
542 .TestQS8();
543 }
544
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_without_bias)545 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_without_bias) {
546 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
547 DeconvolutionOperatorTester()
548 .has_bias(false)
549 .batch_size(2)
550 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
551 .kernel_size(1, 1)
552 .groups(2)
553 .group_input_channels(23)
554 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
555 .iterations(3)
556 .TestQS8();
557 }
558
559 /**************************** CONV path ****************************/
560
561 TEST(DECONVOLUTION_NHWC_QS8, 3x3) {
562 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
563 DeconvolutionOperatorTester()
564 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
565 .padding(1)
566 .kernel_size(3, 3)
567 .group_input_channels(15)
568 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
569 .iterations(3)
570 .TestQS8();
571 }
572
TEST(DECONVOLUTION_NHWC_QS8,Kx3)573 TEST(DECONVOLUTION_NHWC_QS8, Kx3) {
574 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
575 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
576 DeconvolutionOperatorTester()
577 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
578 .padding_width(1)
579 .kernel_size(kernel_height, 3)
580 .group_input_channels(17)
581 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
582 .iterations(3)
583 .TestQS8();
584 }
585 }
586
587 TEST(DECONVOLUTION_NHWC_QS8, 3xK) {
588 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
589 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
590 DeconvolutionOperatorTester()
591 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
592 .padding_height(1)
593 .kernel_size(3, kernel_width)
594 .group_input_channels(17)
595 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
596 .iterations(3)
597 .TestQS8();
598 }
599 }
600
601 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_height_padding) {
602 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
603 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
604 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
605 DeconvolutionOperatorTester()
606 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
607 .padding_width(1)
608 .padding_top(padding_top)
609 .padding_bottom(padding_bottom)
610 .kernel_size(3, 3)
611 .group_input_channels(15)
612 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
613 .iterations(1)
614 .TestQS8();
615 }
616 }
617 }
618
619 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_width_padding) {
620 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
621 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
622 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
623 DeconvolutionOperatorTester()
624 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
625 .padding_height(1)
626 .padding_left(padding_left)
627 .padding_right(padding_right)
628 .kernel_size(3, 3)
629 .group_input_channels(15)
630 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
631 .iterations(1)
632 .TestQS8();
633 }
634 }
635 }
636
637 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_height_adjustment) {
638 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
639 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
640 DeconvolutionOperatorTester()
641 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
642 .padding(1)
643 .stride_height(adjustment_height + 1)
644 .adjustment_height(adjustment_height)
645 .kernel_size(3, 3)
646 .group_input_channels(15)
647 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
648 .iterations(1)
649 .TestQS8();
650 }
651 }
652
653 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_width_adjustment) {
654 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
655 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
656 DeconvolutionOperatorTester()
657 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
658 .padding(1)
659 .stride_width(adjustment_width + 1)
660 .adjustment_width(adjustment_width)
661 .kernel_size(3, 3)
662 .group_input_channels(15)
663 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
664 .iterations(1)
665 .TestQS8();
666 }
667 }
668
669 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_input_height) {
670 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
671 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
672 DeconvolutionOperatorTester()
673 .input_size(input_height, kUnstridedInputWidth)
674 .padding(1)
675 .kernel_size(3, 3)
676 .group_input_channels(15)
677 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
678 .iterations(1)
679 .TestQS8();
680 }
681 }
682
683 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_input_width) {
684 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
685 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
686 DeconvolutionOperatorTester()
687 .input_size(kUnstridedInputHeight, input_width)
688 .padding(1)
689 .kernel_size(3, 3)
690 .group_input_channels(15)
691 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
692 .iterations(1)
693 .TestQS8();
694 }
695 }
696
697 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_input_channels) {
698 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
699 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
700 DeconvolutionOperatorTester()
701 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
702 .padding(1)
703 .kernel_size(3, 3)
704 .group_input_channels(input_channels)
705 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
706 .iterations(1)
707 .TestQS8();
708 }
709 }
710
711 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_output_channels) {
712 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
713 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
714 DeconvolutionOperatorTester()
715 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
716 .padding(1)
717 .kernel_size(3, 3)
718 .group_input_channels(23)
719 .group_output_channels(output_channels)
720 .iterations(1)
721 .TestQS8();
722 }
723 }
724
725 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_height_dilation) {
726 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
727 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
728 DeconvolutionOperatorTester()
729 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
730 .padding(1)
731 .kernel_size(3, 3)
732 .dilation_height(dilation_height)
733 .group_input_channels(23)
734 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
735 .iterations(3)
736 .TestQS8();
737 }
738 }
739
740 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_width_dilation) {
741 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
742 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
743 DeconvolutionOperatorTester()
744 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
745 .padding(1)
746 .kernel_size(3, 3)
747 .dilation_width(dilation_width)
748 .group_input_channels(23)
749 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
750 .iterations(3)
751 .TestQS8();
752 }
753 }
754
755 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_height_dilation_and_stride) {
756 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
757 DeconvolutionOperatorTester()
758 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
759 .padding(1)
760 .kernel_size(3, 3)
761 .dilation_height(3)
762 .stride_height(2)
763 .group_input_channels(23)
764 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
765 .iterations(3)
766 .TestQS8();
767 }
768
769 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_width_dilation_and_stride) {
770 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
771 DeconvolutionOperatorTester()
772 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
773 .padding(1)
774 .kernel_size(3, 3)
775 .dilation_width(3)
776 .stride_width(2)
777 .group_input_channels(23)
778 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
779 .iterations(3)
780 .TestQS8();
781 }
782
783 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_input_stride) {
784 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
785 DeconvolutionOperatorTester()
786 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
787 .padding(1)
788 .kernel_size(3, 3)
789 .group_input_channels(23)
790 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
791 .input_pixel_stride(28)
792 .iterations(3)
793 .TestQS8();
794 }
795
796 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_output_stride) {
797 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
798 DeconvolutionOperatorTester()
799 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
800 .padding(1)
801 .kernel_size(3, 3)
802 .group_input_channels(23)
803 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
804 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
805 .iterations(3)
806 .TestQS8();
807 }
808
809 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_qmin) {
810 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
811 DeconvolutionOperatorTester()
812 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
813 .padding(1)
814 .kernel_size(3, 3)
815 .group_input_channels(23)
816 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
817 .qmin(128)
818 .iterations(3)
819 .TestQS8();
820 }
821
822 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_qmax) {
823 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
824 DeconvolutionOperatorTester()
825 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
826 .padding(1)
827 .kernel_size(3, 3)
828 .group_input_channels(23)
829 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
830 .qmax(128)
831 .iterations(3)
832 .TestQS8();
833 }
834
835 TEST(DECONVOLUTION_NHWC_QS8, 3x3_without_bias) {
836 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
837 DeconvolutionOperatorTester()
838 .has_bias(false)
839 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
840 .padding(1)
841 .kernel_size(3, 3)
842 .group_input_channels(23)
843 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
844 .iterations(3)
845 .TestQS8();
846 }
847
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_3x3)848 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_3x3) {
849 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
850 DeconvolutionOperatorTester()
851 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
852 .padding(1)
853 .kernel_size(3, 3)
854 .group_input_channels(15)
855 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
856 .use_weights_cache(true)
857 .iterations(3)
858 .TestQS8();
859 }
860
861 /**************************** CONV path, grouped ****************************/
862
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3)863 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3) {
864 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
865 DeconvolutionOperatorTester()
866 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
867 .padding(1)
868 .kernel_size(3, 3)
869 .groups(2)
870 .group_input_channels(15)
871 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
872 .iterations(3)
873 .TestQS8();
874 }
875
TEST(DECONVOLUTION_NHWC_QS8,grouped_Kx3)876 TEST(DECONVOLUTION_NHWC_QS8, grouped_Kx3) {
877 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
878 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
879 DeconvolutionOperatorTester()
880 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
881 .padding_width(1)
882 .kernel_size(kernel_height, 3)
883 .groups(2)
884 .group_input_channels(17)
885 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
886 .iterations(3)
887 .TestQS8();
888 }
889 }
890
TEST(DECONVOLUTION_NHWC_QS8,grouped_3xK)891 TEST(DECONVOLUTION_NHWC_QS8, grouped_3xK) {
892 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
893 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
894 DeconvolutionOperatorTester()
895 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
896 .padding_height(1)
897 .kernel_size(3, kernel_width)
898 .groups(2)
899 .group_input_channels(17)
900 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
901 .iterations(3)
902 .TestQS8();
903 }
904 }
905
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_height_padding)906 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_height_padding) {
907 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
908 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
909 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
910 DeconvolutionOperatorTester()
911 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
912 .padding_width(1)
913 .padding_top(padding_top)
914 .padding_bottom(padding_bottom)
915 .kernel_size(3, 3)
916 .groups(2)
917 .group_input_channels(15)
918 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
919 .iterations(1)
920 .TestQS8();
921 }
922 }
923 }
924
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_width_padding)925 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_width_padding) {
926 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
927 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
928 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
929 DeconvolutionOperatorTester()
930 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
931 .padding_height(1)
932 .padding_left(padding_left)
933 .padding_right(padding_right)
934 .kernel_size(3, 3)
935 .groups(2)
936 .group_input_channels(15)
937 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
938 .iterations(1)
939 .TestQS8();
940 }
941 }
942 }
943
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_height_adjustment)944 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_height_adjustment) {
945 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
946 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
947 DeconvolutionOperatorTester()
948 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
949 .padding(1)
950 .stride_height(adjustment_height + 1)
951 .adjustment_height(adjustment_height)
952 .kernel_size(3, 3)
953 .groups(2)
954 .group_input_channels(15)
955 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
956 .iterations(1)
957 .TestQS8();
958 }
959 }
960
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_width_adjustment)961 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_width_adjustment) {
962 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
963 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
964 DeconvolutionOperatorTester()
965 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
966 .padding(1)
967 .stride_width(adjustment_width + 1)
968 .adjustment_width(adjustment_width)
969 .kernel_size(3, 3)
970 .groups(2)
971 .group_input_channels(15)
972 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
973 .iterations(1)
974 .TestQS8();
975 }
976 }
977
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_input_height)978 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_input_height) {
979 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
980 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
981 DeconvolutionOperatorTester()
982 .input_size(input_height, kUnstridedInputWidth)
983 .padding(1)
984 .kernel_size(3, 3)
985 .groups(2)
986 .group_input_channels(15)
987 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
988 .iterations(1)
989 .TestQS8();
990 }
991 }
992
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_input_width)993 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_input_width) {
994 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
995 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
996 DeconvolutionOperatorTester()
997 .input_size(kUnstridedInputHeight, input_width)
998 .padding(1)
999 .kernel_size(3, 3)
1000 .groups(2)
1001 .group_input_channels(15)
1002 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1003 .iterations(1)
1004 .TestQS8();
1005 }
1006 }
1007
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_input_channels)1008 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_input_channels) {
1009 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1010 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
1011 DeconvolutionOperatorTester()
1012 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1013 .padding(1)
1014 .kernel_size(3, 3)
1015 .groups(2)
1016 .group_input_channels(input_channels)
1017 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1018 .iterations(1)
1019 .TestQS8();
1020 }
1021 }
1022
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_output_channels)1023 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_output_channels) {
1024 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1025 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
1026 DeconvolutionOperatorTester()
1027 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1028 .padding(1)
1029 .kernel_size(3, 3)
1030 .groups(2)
1031 .group_input_channels(23)
1032 .group_output_channels(output_channels)
1033 .iterations(1)
1034 .TestQS8();
1035 }
1036 }
1037
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_height_dilation)1038 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_height_dilation) {
1039 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1040 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
1041 DeconvolutionOperatorTester()
1042 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1043 .padding(1)
1044 .kernel_size(3, 3)
1045 .dilation_height(dilation_height)
1046 .groups(2)
1047 .group_input_channels(23)
1048 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1049 .iterations(3)
1050 .TestQS8();
1051 }
1052 }
1053
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_width_dilation)1054 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_width_dilation) {
1055 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1056 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
1057 DeconvolutionOperatorTester()
1058 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1059 .padding(1)
1060 .kernel_size(3, 3)
1061 .dilation_width(dilation_width)
1062 .groups(2)
1063 .group_input_channels(23)
1064 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1065 .iterations(3)
1066 .TestQS8();
1067 }
1068 }
1069
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_height_dilation_and_stride)1070 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_height_dilation_and_stride) {
1071 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1072 DeconvolutionOperatorTester()
1073 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1074 .padding(1)
1075 .kernel_size(3, 3)
1076 .dilation_height(3)
1077 .stride_height(2)
1078 .groups(2)
1079 .group_input_channels(23)
1080 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1081 .iterations(3)
1082 .TestQS8();
1083 }
1084
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_width_dilation_and_stride)1085 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_width_dilation_and_stride) {
1086 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1087 DeconvolutionOperatorTester()
1088 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1089 .padding(1)
1090 .kernel_size(3, 3)
1091 .dilation_width(3)
1092 .stride_width(2)
1093 .groups(2)
1094 .group_input_channels(23)
1095 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1096 .iterations(3)
1097 .TestQS8();
1098 }
1099
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_input_stride)1100 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_input_stride) {
1101 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1102 DeconvolutionOperatorTester()
1103 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1104 .padding(1)
1105 .kernel_size(3, 3)
1106 .groups(2)
1107 .group_input_channels(23)
1108 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1109 .input_pixel_stride(47)
1110 .iterations(3)
1111 .TestQS8();
1112 }
1113
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_output_stride)1114 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_output_stride) {
1115 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1116 DeconvolutionOperatorTester()
1117 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1118 .padding(1)
1119 .kernel_size(3, 3)
1120 .groups(2)
1121 .group_input_channels(23)
1122 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
1123 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
1124 .iterations(3)
1125 .TestQS8();
1126 }
1127
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_qmin)1128 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_qmin) {
1129 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1130 DeconvolutionOperatorTester()
1131 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1132 .padding(1)
1133 .kernel_size(3, 3)
1134 .groups(2)
1135 .group_input_channels(23)
1136 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1137 .qmin(128)
1138 .iterations(3)
1139 .TestQS8();
1140 }
1141
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_qmax)1142 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_qmax) {
1143 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1144 DeconvolutionOperatorTester()
1145 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1146 .padding(1)
1147 .kernel_size(3, 3)
1148 .groups(2)
1149 .group_input_channels(23)
1150 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1151 .qmax(128)
1152 .iterations(3)
1153 .TestQS8();
1154 }
1155
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_without_bias)1156 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_without_bias) {
1157 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1158 DeconvolutionOperatorTester()
1159 .has_bias(false)
1160 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1161 .padding(1)
1162 .kernel_size(3, 3)
1163 .groups(2)
1164 .group_input_channels(23)
1165 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1166 .iterations(3)
1167 .TestQS8();
1168 }
1169
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_grouped_3x3)1170 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_grouped_3x3) {
1171 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1172 DeconvolutionOperatorTester()
1173 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1174 .padding(1)
1175 .kernel_size(3, 3)
1176 .groups(2)
1177 .group_input_channels(15)
1178 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1179 .use_weights_cache(true)
1180 .iterations(3)
1181 .TestQS8();
1182 }
1183
1184 /**************************** CONV path, batched ****************************/
1185
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3)1186 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3) {
1187 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1188 DeconvolutionOperatorTester()
1189 .batch_size(2)
1190 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1191 .padding(1)
1192 .kernel_size(3, 3)
1193 .group_input_channels(15)
1194 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1195 .iterations(3)
1196 .TestQS8();
1197 }
1198
TEST(DECONVOLUTION_NHWC_QS8,batched_Kx3)1199 TEST(DECONVOLUTION_NHWC_QS8, batched_Kx3) {
1200 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1201 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
1202 DeconvolutionOperatorTester()
1203 .batch_size(2)
1204 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1205 .padding_width(1)
1206 .kernel_size(kernel_height, 3)
1207 .group_input_channels(17)
1208 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1209 .iterations(3)
1210 .TestQS8();
1211 }
1212 }
1213
TEST(DECONVOLUTION_NHWC_QS8,batched_3xK)1214 TEST(DECONVOLUTION_NHWC_QS8, batched_3xK) {
1215 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1216 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
1217 DeconvolutionOperatorTester()
1218 .batch_size(2)
1219 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1220 .padding_height(1)
1221 .kernel_size(3, kernel_width)
1222 .group_input_channels(17)
1223 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1224 .iterations(3)
1225 .TestQS8();
1226 }
1227 }
1228
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_height_padding)1229 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_height_padding) {
1230 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1231 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
1232 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
1233 DeconvolutionOperatorTester()
1234 .batch_size(2)
1235 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1236 .padding_width(1)
1237 .padding_top(padding_top)
1238 .padding_bottom(padding_bottom)
1239 .kernel_size(3, 3)
1240 .group_input_channels(15)
1241 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1242 .iterations(1)
1243 .TestQS8();
1244 }
1245 }
1246 }
1247
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_width_padding)1248 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_width_padding) {
1249 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1250 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
1251 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
1252 DeconvolutionOperatorTester()
1253 .batch_size(2)
1254 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1255 .padding_height(1)
1256 .padding_left(padding_left)
1257 .padding_right(padding_right)
1258 .kernel_size(3, 3)
1259 .group_input_channels(15)
1260 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1261 .iterations(1)
1262 .TestQS8();
1263 }
1264 }
1265 }
1266
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_height_adjustment)1267 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_height_adjustment) {
1268 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1269 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
1270 DeconvolutionOperatorTester()
1271 .batch_size(2)
1272 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1273 .padding(1)
1274 .stride_height(adjustment_height + 1)
1275 .adjustment_height(adjustment_height)
1276 .kernel_size(3, 3)
1277 .group_input_channels(15)
1278 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1279 .iterations(1)
1280 .TestQS8();
1281 }
1282 }
1283
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_width_adjustment)1284 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_width_adjustment) {
1285 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1286 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
1287 DeconvolutionOperatorTester()
1288 .batch_size(2)
1289 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1290 .padding(1)
1291 .stride_width(adjustment_width + 1)
1292 .adjustment_width(adjustment_width)
1293 .kernel_size(3, 3)
1294 .group_input_channels(15)
1295 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1296 .iterations(1)
1297 .TestQS8();
1298 }
1299 }
1300
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_input_height)1301 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_input_height) {
1302 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1303 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
1304 DeconvolutionOperatorTester()
1305 .batch_size(2)
1306 .input_size(input_height, kUnstridedInputWidth)
1307 .padding(1)
1308 .kernel_size(3, 3)
1309 .group_input_channels(15)
1310 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1311 .iterations(1)
1312 .TestQS8();
1313 }
1314 }
1315
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_input_width)1316 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_input_width) {
1317 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1318 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
1319 DeconvolutionOperatorTester()
1320 .batch_size(2)
1321 .input_size(kUnstridedInputHeight, input_width)
1322 .padding(1)
1323 .kernel_size(3, 3)
1324 .group_input_channels(15)
1325 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1326 .iterations(1)
1327 .TestQS8();
1328 }
1329 }
1330
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_input_channels)1331 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_input_channels) {
1332 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1333 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
1334 DeconvolutionOperatorTester()
1335 .batch_size(2)
1336 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1337 .padding(1)
1338 .kernel_size(3, 3)
1339 .group_input_channels(input_channels)
1340 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1341 .iterations(1)
1342 .TestQS8();
1343 }
1344 }
1345
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_output_channels)1346 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_output_channels) {
1347 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1348 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
1349 DeconvolutionOperatorTester()
1350 .batch_size(2)
1351 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1352 .padding(1)
1353 .kernel_size(3, 3)
1354 .group_input_channels(23)
1355 .group_output_channels(output_channels)
1356 .iterations(1)
1357 .TestQS8();
1358 }
1359 }
1360
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_height_dilation)1361 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_height_dilation) {
1362 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1363 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
1364 DeconvolutionOperatorTester()
1365 .batch_size(2)
1366 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1367 .padding(1)
1368 .kernel_size(3, 3)
1369 .dilation_height(dilation_height)
1370 .group_input_channels(23)
1371 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1372 .iterations(3)
1373 .TestQS8();
1374 }
1375 }
1376
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_width_dilation)1377 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_width_dilation) {
1378 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1379 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
1380 DeconvolutionOperatorTester()
1381 .batch_size(2)
1382 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1383 .padding(1)
1384 .kernel_size(3, 3)
1385 .dilation_width(dilation_width)
1386 .group_input_channels(23)
1387 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1388 .iterations(3)
1389 .TestQS8();
1390 }
1391 }
1392
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_height_dilation_and_stride)1393 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_height_dilation_and_stride) {
1394 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1395 DeconvolutionOperatorTester()
1396 .batch_size(2)
1397 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1398 .padding(1)
1399 .kernel_size(3, 3)
1400 .dilation_height(3)
1401 .stride_height(2)
1402 .group_input_channels(23)
1403 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1404 .iterations(3)
1405 .TestQS8();
1406 }
1407
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_width_dilation_and_stride)1408 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_width_dilation_and_stride) {
1409 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1410 DeconvolutionOperatorTester()
1411 .batch_size(2)
1412 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1413 .padding(1)
1414 .kernel_size(3, 3)
1415 .dilation_width(3)
1416 .stride_width(2)
1417 .group_input_channels(23)
1418 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1419 .iterations(3)
1420 .TestQS8();
1421 }
1422
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_input_stride)1423 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_input_stride) {
1424 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1425 DeconvolutionOperatorTester()
1426 .batch_size(2)
1427 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1428 .padding(1)
1429 .kernel_size(3, 3)
1430 .group_input_channels(23)
1431 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1432 .input_pixel_stride(28)
1433 .iterations(3)
1434 .TestQS8();
1435 }
1436
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_output_stride)1437 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_output_stride) {
1438 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1439 DeconvolutionOperatorTester()
1440 .batch_size(2)
1441 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1442 .padding(1)
1443 .kernel_size(3, 3)
1444 .group_input_channels(23)
1445 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1446 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
1447 .iterations(3)
1448 .TestQS8();
1449 }
1450
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_qmin)1451 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_qmin) {
1452 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1453 DeconvolutionOperatorTester()
1454 .batch_size(2)
1455 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1456 .padding(1)
1457 .kernel_size(3, 3)
1458 .group_input_channels(23)
1459 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1460 .qmin(128)
1461 .iterations(3)
1462 .TestQS8();
1463 }
1464
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_qmax)1465 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_qmax) {
1466 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1467 DeconvolutionOperatorTester()
1468 .batch_size(2)
1469 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1470 .padding(1)
1471 .kernel_size(3, 3)
1472 .group_input_channels(23)
1473 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1474 .qmax(128)
1475 .iterations(3)
1476 .TestQS8();
1477 }
1478
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_without_bias)1479 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_without_bias) {
1480 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1481 DeconvolutionOperatorTester()
1482 .has_bias(false)
1483 .batch_size(2)
1484 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1485 .padding(1)
1486 .kernel_size(3, 3)
1487 .group_input_channels(23)
1488 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1489 .iterations(3)
1490 .TestQS8();
1491 }
1492
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_3x3)1493 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_3x3) {
1494 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1495 DeconvolutionOperatorTester()
1496 .batch_size(2)
1497 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1498 .padding(1)
1499 .kernel_size(3, 3)
1500 .group_input_channels(15)
1501 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1502 .use_weights_cache(true)
1503 .iterations(3)
1504 .TestQS8();
1505 }
1506
1507 /**************************** CONV path, grouped, batched ****************************/
1508
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3)1509 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3) {
1510 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1511 DeconvolutionOperatorTester()
1512 .batch_size(2)
1513 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1514 .padding(1)
1515 .kernel_size(3, 3)
1516 .groups(2)
1517 .group_input_channels(15)
1518 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1519 .iterations(3)
1520 .TestQS8();
1521 }
1522
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_Kx3)1523 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_Kx3) {
1524 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1525 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
1526 DeconvolutionOperatorTester()
1527 .batch_size(2)
1528 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1529 .padding_width(1)
1530 .kernel_size(kernel_height, 3)
1531 .groups(2)
1532 .group_input_channels(17)
1533 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1534 .iterations(3)
1535 .TestQS8();
1536 }
1537 }
1538
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3xK)1539 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3xK) {
1540 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1541 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
1542 DeconvolutionOperatorTester()
1543 .batch_size(2)
1544 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1545 .padding_height(1)
1546 .kernel_size(3, kernel_width)
1547 .groups(2)
1548 .group_input_channels(17)
1549 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1550 .iterations(3)
1551 .TestQS8();
1552 }
1553 }
1554
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_height_padding)1555 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_height_padding) {
1556 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1557 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
1558 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
1559 DeconvolutionOperatorTester()
1560 .batch_size(2)
1561 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1562 .padding_width(1)
1563 .padding_top(padding_top)
1564 .padding_bottom(padding_bottom)
1565 .kernel_size(3, 3)
1566 .groups(2)
1567 .group_input_channels(15)
1568 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1569 .iterations(1)
1570 .TestQS8();
1571 }
1572 }
1573 }
1574
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_width_padding)1575 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_width_padding) {
1576 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1577 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
1578 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
1579 DeconvolutionOperatorTester()
1580 .batch_size(2)
1581 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1582 .padding_height(1)
1583 .padding_left(padding_left)
1584 .padding_right(padding_right)
1585 .kernel_size(3, 3)
1586 .groups(2)
1587 .group_input_channels(15)
1588 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1589 .iterations(1)
1590 .TestQS8();
1591 }
1592 }
1593 }
1594
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_height_adjustment)1595 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_height_adjustment) {
1596 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1597 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
1598 DeconvolutionOperatorTester()
1599 .batch_size(2)
1600 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1601 .padding(1)
1602 .stride_height(adjustment_height + 1)
1603 .adjustment_height(adjustment_height)
1604 .kernel_size(3, 3)
1605 .groups(2)
1606 .group_input_channels(15)
1607 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1608 .iterations(1)
1609 .TestQS8();
1610 }
1611 }
1612
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_width_adjustment)1613 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_width_adjustment) {
1614 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1615 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
1616 DeconvolutionOperatorTester()
1617 .batch_size(2)
1618 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1619 .padding(1)
1620 .stride_width(adjustment_width + 1)
1621 .adjustment_width(adjustment_width)
1622 .kernel_size(3, 3)
1623 .groups(2)
1624 .group_input_channels(15)
1625 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1626 .iterations(1)
1627 .TestQS8();
1628 }
1629 }
1630
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_input_height)1631 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_input_height) {
1632 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1633 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
1634 DeconvolutionOperatorTester()
1635 .batch_size(2)
1636 .input_size(input_height, kUnstridedInputWidth)
1637 .padding(1)
1638 .kernel_size(3, 3)
1639 .groups(2)
1640 .group_input_channels(15)
1641 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1642 .iterations(1)
1643 .TestQS8();
1644 }
1645 }
1646
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_input_width)1647 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_input_width) {
1648 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1649 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
1650 DeconvolutionOperatorTester()
1651 .batch_size(2)
1652 .input_size(kUnstridedInputHeight, input_width)
1653 .padding(1)
1654 .kernel_size(3, 3)
1655 .groups(2)
1656 .group_input_channels(15)
1657 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1658 .iterations(1)
1659 .TestQS8();
1660 }
1661 }
1662
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_input_channels)1663 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_input_channels) {
1664 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1665 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
1666 DeconvolutionOperatorTester()
1667 .batch_size(2)
1668 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1669 .padding(1)
1670 .kernel_size(3, 3)
1671 .groups(2)
1672 .group_input_channels(input_channels)
1673 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1674 .iterations(1)
1675 .TestQS8();
1676 }
1677 }
1678
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_output_channels)1679 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_output_channels) {
1680 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1681 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
1682 DeconvolutionOperatorTester()
1683 .batch_size(2)
1684 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1685 .padding(1)
1686 .kernel_size(3, 3)
1687 .groups(2)
1688 .group_input_channels(23)
1689 .group_output_channels(output_channels)
1690 .iterations(1)
1691 .TestQS8();
1692 }
1693 }
1694
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_height_dilation)1695 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_height_dilation) {
1696 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1697 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
1698 DeconvolutionOperatorTester()
1699 .batch_size(2)
1700 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1701 .padding(1)
1702 .kernel_size(3, 3)
1703 .dilation_height(dilation_height)
1704 .groups(2)
1705 .group_input_channels(23)
1706 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1707 .iterations(3)
1708 .TestQS8();
1709 }
1710 }
1711
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_width_dilation)1712 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_width_dilation) {
1713 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1714 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
1715 DeconvolutionOperatorTester()
1716 .batch_size(2)
1717 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1718 .padding(1)
1719 .kernel_size(3, 3)
1720 .dilation_width(dilation_width)
1721 .groups(2)
1722 .group_input_channels(23)
1723 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1724 .iterations(3)
1725 .TestQS8();
1726 }
1727 }
1728
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_height_dilation_and_stride)1729 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_height_dilation_and_stride) {
1730 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1731 DeconvolutionOperatorTester()
1732 .batch_size(2)
1733 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1734 .padding(1)
1735 .kernel_size(3, 3)
1736 .dilation_height(3)
1737 .stride_width(2)
1738 .groups(2)
1739 .group_input_channels(23)
1740 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1741 .iterations(3)
1742 .TestQS8();
1743 }
1744
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_width_dilation_and_stride)1745 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_width_dilation_and_stride) {
1746 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1747 DeconvolutionOperatorTester()
1748 .batch_size(2)
1749 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1750 .padding(1)
1751 .kernel_size(3, 3)
1752 .dilation_width(3)
1753 .stride_width(2)
1754 .groups(2)
1755 .group_input_channels(23)
1756 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1757 .iterations(3)
1758 .TestQS8();
1759 }
1760
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_input_stride)1761 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_input_stride) {
1762 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1763 DeconvolutionOperatorTester()
1764 .batch_size(2)
1765 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1766 .padding(1)
1767 .kernel_size(3, 3)
1768 .groups(2)
1769 .group_input_channels(23)
1770 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1771 .input_pixel_stride(47)
1772 .iterations(3)
1773 .TestQS8();
1774 }
1775
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_output_stride)1776 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_output_stride) {
1777 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1778 DeconvolutionOperatorTester()
1779 .batch_size(2)
1780 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1781 .padding(1)
1782 .kernel_size(3, 3)
1783 .groups(2)
1784 .group_input_channels(23)
1785 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
1786 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
1787 .iterations(3)
1788 .TestQS8();
1789 }
1790
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_qmin)1791 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_qmin) {
1792 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1793 DeconvolutionOperatorTester()
1794 .batch_size(2)
1795 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1796 .padding(1)
1797 .kernel_size(3, 3)
1798 .groups(2)
1799 .group_input_channels(23)
1800 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1801 .qmin(128)
1802 .iterations(3)
1803 .TestQS8();
1804 }
1805
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_qmax)1806 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_qmax) {
1807 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1808 DeconvolutionOperatorTester()
1809 .batch_size(2)
1810 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1811 .padding(1)
1812 .kernel_size(3, 3)
1813 .groups(2)
1814 .group_input_channels(23)
1815 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1816 .qmax(128)
1817 .iterations(3)
1818 .TestQS8();
1819 }
1820
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_without_bias)1821 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_without_bias) {
1822 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1823 DeconvolutionOperatorTester()
1824 .has_bias(false)
1825 .batch_size(2)
1826 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1827 .padding(1)
1828 .kernel_size(3, 3)
1829 .groups(2)
1830 .group_input_channels(23)
1831 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1832 .iterations(3)
1833 .TestQS8();
1834 }
1835
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_grouped_3x3)1836 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_grouped_3x3) {
1837 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1838 DeconvolutionOperatorTester()
1839 .batch_size(2)
1840 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1841 .padding(1)
1842 .kernel_size(3, 3)
1843 .groups(2)
1844 .group_input_channels(15)
1845 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1846 .use_weights_cache(true)
1847 .iterations(3)
1848 .TestQS8();
1849 }
1850
1851 /**************************** CONV path, setup ****************************/
1852
1853 TEST(DECONVOLUTION_NHWC_QS8, 3x3_setup_changing_batch) {
1854 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1855 DeconvolutionOperatorTester()
1856 .batch_size(2)
1857 .next_batch_size(5)
1858 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1859 .kernel_height(3)
1860 .kernel_width(5)
1861 .groups(2)
1862 .group_input_channels(15)
1863 .group_output_channels(17)
1864 .TestSetupQS8();
1865 }
1866
1867 TEST(DECONVOLUTION_NHWC_QS8, 3x3_setup_changing_height) {
1868 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1869 DeconvolutionOperatorTester()
1870 .batch_size(2)
1871 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1872 .next_input_height(kUnstridedInputHeight + 3)
1873 .kernel_height(3)
1874 .kernel_width(5)
1875 .groups(2)
1876 .group_input_channels(15)
1877 .group_output_channels(17)
1878 .TestSetupQS8();
1879 }
1880
1881 TEST(DECONVOLUTION_NHWC_QS8, 3x3_setup_changing_width) {
1882 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1883 DeconvolutionOperatorTester()
1884 .batch_size(2)
1885 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1886 .next_input_width(kUnstridedInputWidth + 3)
1887 .kernel_height(3)
1888 .kernel_width(5)
1889 .groups(2)
1890 .group_input_channels(15)
1891 .group_output_channels(17)
1892 .TestSetupQS8();
1893 }
1894
1895 /**************************** SUBCONV2D/IGEMM path ****************************/
1896
1897 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2) {
1898 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1899 DeconvolutionOperatorTester()
1900 .input_size(kStridedInputHeight, kStridedInputWidth)
1901 .padding(1)
1902 .kernel_size(3, 3)
1903 .stride(2)
1904 .group_input_channels(15)
1905 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1906 .iterations(3)
1907 .TestQS8();
1908 }
1909
TEST(DECONVOLUTION_NHWC_QS8,Kx3s2)1910 TEST(DECONVOLUTION_NHWC_QS8, Kx3s2) {
1911 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1912 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
1913 DeconvolutionOperatorTester()
1914 .input_size(kStridedInputHeight, kStridedInputWidth)
1915 .padding_width(1)
1916 .kernel_size(kernel_height, 3)
1917 .stride(2)
1918 .group_input_channels(17)
1919 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1920 .iterations(3)
1921 .TestQS8();
1922 }
1923 }
1924
1925 TEST(DECONVOLUTION_NHWC_QS8, 3xKs2) {
1926 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1927 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
1928 DeconvolutionOperatorTester()
1929 .input_size(kStridedInputHeight, kStridedInputWidth)
1930 .padding_height(1)
1931 .kernel_size(3, kernel_width)
1932 .stride(2)
1933 .group_input_channels(17)
1934 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1935 .iterations(3)
1936 .TestQS8();
1937 }
1938 }
1939
1940 TEST(DECONVOLUTION_NHWC_QS8, 3x3sSx1) {
1941 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1942 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
1943 DeconvolutionOperatorTester()
1944 .input_size(kStridedInputHeight, kStridedInputWidth)
1945 .padding(1)
1946 .padding_width(1)
1947 .kernel_size(3, 3)
1948 .stride_height(stride_height)
1949 .group_input_channels(17)
1950 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1951 .iterations(3)
1952 .TestQS8();
1953 }
1954 }
1955
1956 TEST(DECONVOLUTION_NHWC_QS8, 3x3s1xS) {
1957 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1958 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
1959 DeconvolutionOperatorTester()
1960 .input_size(kStridedInputHeight, kStridedInputWidth)
1961 .padding(1)
1962 .padding_width(1)
1963 .kernel_size(3, 3)
1964 .stride_width(stride_width)
1965 .group_input_channels(17)
1966 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1967 .iterations(3)
1968 .TestQS8();
1969 }
1970 }
1971
1972 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_height_padding) {
1973 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1974 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
1975 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
1976 DeconvolutionOperatorTester()
1977 .input_size(kStridedInputHeight, kStridedInputWidth)
1978 .padding_width(1)
1979 .padding_top(padding_top)
1980 .padding_bottom(padding_bottom)
1981 .kernel_size(3, 3)
1982 .stride(2)
1983 .group_input_channels(15)
1984 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1985 .iterations(1)
1986 .TestQS8();
1987 }
1988 }
1989 }
1990
1991 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_width_padding) {
1992 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1993 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
1994 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
1995 DeconvolutionOperatorTester()
1996 .input_size(kStridedInputHeight, kStridedInputWidth)
1997 .padding_height(1)
1998 .padding_left(padding_left)
1999 .padding_right(padding_right)
2000 .kernel_size(3, 3)
2001 .stride(2)
2002 .group_input_channels(15)
2003 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2004 .iterations(1)
2005 .TestQS8();
2006 }
2007 }
2008 }
2009
2010 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_height_adjustment) {
2011 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2012 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2013 DeconvolutionOperatorTester()
2014 .input_size(kStridedInputHeight, kStridedInputWidth)
2015 .padding(1)
2016 .adjustment_height(adjustment_height)
2017 .kernel_size(3, 3)
2018 .stride(2)
2019 .group_input_channels(15)
2020 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2021 .iterations(1)
2022 .TestQS8();
2023 }
2024 }
2025
2026 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_width_adjustment) {
2027 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2028 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2029 DeconvolutionOperatorTester()
2030 .input_size(kStridedInputHeight, kStridedInputWidth)
2031 .padding(1)
2032 .adjustment_width(adjustment_width)
2033 .kernel_size(3, 3)
2034 .stride(2)
2035 .group_input_channels(15)
2036 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2037 .iterations(1)
2038 .TestQS8();
2039 }
2040 }
2041
2042 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_input_height) {
2043 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2044 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2045 DeconvolutionOperatorTester()
2046 .input_size(input_height, kStridedInputWidth)
2047 .padding(1)
2048 .kernel_size(3, 3)
2049 .stride(2)
2050 .group_input_channels(15)
2051 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2052 .iterations(1)
2053 .TestQS8();
2054 }
2055 }
2056
2057 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_input_width) {
2058 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2059 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2060 DeconvolutionOperatorTester()
2061 .input_size(kStridedInputHeight, kStridedInputWidth)
2062 .padding(1)
2063 .kernel_size(3, 3)
2064 .stride(2)
2065 .group_input_channels(15)
2066 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2067 .iterations(1)
2068 .TestQS8();
2069 }
2070 }
2071
2072 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_input_channels) {
2073 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2074 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
2075 DeconvolutionOperatorTester()
2076 .input_size(kStridedInputHeight, kStridedInputWidth)
2077 .padding(1)
2078 .kernel_size(3, 3)
2079 .stride(2)
2080 .group_input_channels(input_channels)
2081 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2082 .iterations(1)
2083 .TestQS8();
2084 }
2085 }
2086
2087 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_output_channels) {
2088 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2089 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
2090 DeconvolutionOperatorTester()
2091 .input_size(kStridedInputHeight, kStridedInputWidth)
2092 .padding(1)
2093 .kernel_size(3, 3)
2094 .stride(2)
2095 .group_input_channels(23)
2096 .group_output_channels(output_channels)
2097 .iterations(1)
2098 .TestQS8();
2099 }
2100 }
2101
2102 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_input_stride) {
2103 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2104 DeconvolutionOperatorTester()
2105 .input_size(kStridedInputHeight, kStridedInputWidth)
2106 .padding(1)
2107 .kernel_size(3, 3)
2108 .stride(2)
2109 .group_input_channels(23)
2110 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2111 .input_pixel_stride(28)
2112 .iterations(3)
2113 .TestQS8();
2114 }
2115
2116 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_output_stride) {
2117 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2118 DeconvolutionOperatorTester()
2119 .input_size(kStridedInputHeight, kStridedInputWidth)
2120 .padding(1)
2121 .kernel_size(3, 3)
2122 .stride(2)
2123 .group_input_channels(23)
2124 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2125 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
2126 .iterations(3)
2127 .TestQS8();
2128 }
2129
2130 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_qmin) {
2131 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2132 DeconvolutionOperatorTester()
2133 .input_size(kStridedInputHeight, kStridedInputWidth)
2134 .padding(1)
2135 .kernel_size(3, 3)
2136 .stride(2)
2137 .group_input_channels(23)
2138 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2139 .qmin(128)
2140 .iterations(3)
2141 .TestQS8();
2142 }
2143
2144 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_qmax) {
2145 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2146 DeconvolutionOperatorTester()
2147 .input_size(kStridedInputHeight, kStridedInputWidth)
2148 .padding(1)
2149 .kernel_size(3, 3)
2150 .stride(2)
2151 .group_input_channels(23)
2152 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2153 .qmax(128)
2154 .iterations(3)
2155 .TestQS8();
2156 }
2157
2158 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_without_bias) {
2159 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2160 DeconvolutionOperatorTester()
2161 .has_bias(false)
2162 .input_size(kStridedInputHeight, kStridedInputWidth)
2163 .padding(1)
2164 .kernel_size(3, 3)
2165 .stride(2)
2166 .group_input_channels(23)
2167 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2168 .iterations(3)
2169 .TestQS8();
2170 }
2171
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_3x3s2)2172 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_3x3s2) {
2173 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2174 DeconvolutionOperatorTester()
2175 .input_size(kStridedInputHeight, kStridedInputWidth)
2176 .padding(1)
2177 .kernel_size(3, 3)
2178 .stride(2)
2179 .group_input_channels(15)
2180 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2181 .use_weights_cache(true)
2182 .iterations(3)
2183 .TestQS8();
2184 }
2185
2186 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
2187
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2)2188 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2) {
2189 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2190 DeconvolutionOperatorTester()
2191 .input_size(kStridedInputHeight, kStridedInputWidth)
2192 .padding(1)
2193 .kernel_size(3, 3)
2194 .stride(2)
2195 .groups(2)
2196 .group_input_channels(17)
2197 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2198 .iterations(3)
2199 .TestQS8();
2200 }
2201
TEST(DECONVOLUTION_NHWC_QS8,grouped_Kx3s2)2202 TEST(DECONVOLUTION_NHWC_QS8, grouped_Kx3s2) {
2203 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2204 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
2205 DeconvolutionOperatorTester()
2206 .input_size(kStridedInputHeight, kStridedInputWidth)
2207 .padding_width(1)
2208 .kernel_size(kernel_height, 3)
2209 .stride(2)
2210 .groups(2)
2211 .group_input_channels(17)
2212 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2213 .iterations(3)
2214 .TestQS8();
2215 }
2216 }
2217
TEST(DECONVOLUTION_NHWC_QS8,grouped_3xKs2)2218 TEST(DECONVOLUTION_NHWC_QS8, grouped_3xKs2) {
2219 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2220 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
2221 DeconvolutionOperatorTester()
2222 .input_size(kStridedInputHeight, kStridedInputWidth)
2223 .padding_height(1)
2224 .kernel_size(3, kernel_width)
2225 .stride(2)
2226 .groups(2)
2227 .group_input_channels(17)
2228 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2229 .iterations(3)
2230 .TestQS8();
2231 }
2232 }
2233
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3sSx1)2234 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3sSx1) {
2235 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2236 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
2237 DeconvolutionOperatorTester()
2238 .input_size(kStridedInputHeight, kStridedInputWidth)
2239 .padding(1)
2240 .padding_width(1)
2241 .kernel_size(3, 3)
2242 .stride_height(stride_height)
2243 .groups(2)
2244 .group_input_channels(17)
2245 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2246 .iterations(3)
2247 .TestQS8();
2248 }
2249 }
2250
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s1xS)2251 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s1xS) {
2252 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2253 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
2254 DeconvolutionOperatorTester()
2255 .input_size(kStridedInputHeight, kStridedInputWidth)
2256 .padding(1)
2257 .padding_width(1)
2258 .kernel_size(3, 3)
2259 .stride_width(stride_width)
2260 .groups(2)
2261 .group_input_channels(17)
2262 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2263 .iterations(3)
2264 .TestQS8();
2265 }
2266 }
2267
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_height_padding)2268 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_height_padding) {
2269 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2270 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
2271 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
2272 DeconvolutionOperatorTester()
2273 .input_size(kStridedInputHeight, kStridedInputWidth)
2274 .padding_width(1)
2275 .padding_top(padding_top)
2276 .padding_bottom(padding_bottom)
2277 .kernel_size(3, 3)
2278 .stride(2)
2279 .groups(2)
2280 .group_input_channels(17)
2281 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2282 .iterations(1)
2283 .TestQS8();
2284 }
2285 }
2286 }
2287
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_width_padding)2288 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_width_padding) {
2289 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2290 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
2291 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
2292 DeconvolutionOperatorTester()
2293 .input_size(kStridedInputHeight, kStridedInputWidth)
2294 .padding_height(1)
2295 .padding_left(padding_left)
2296 .padding_right(padding_right)
2297 .kernel_size(3, 3)
2298 .stride(2)
2299 .groups(2)
2300 .group_input_channels(17)
2301 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2302 .iterations(1)
2303 .TestQS8();
2304 }
2305 }
2306 }
2307
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_height_adjustment)2308 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_height_adjustment) {
2309 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2310 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2311 DeconvolutionOperatorTester()
2312 .input_size(kStridedInputHeight, kStridedInputWidth)
2313 .padding(1)
2314 .adjustment_height(adjustment_height)
2315 .kernel_size(3, 3)
2316 .stride(2)
2317 .groups(2)
2318 .group_input_channels(17)
2319 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2320 .iterations(1)
2321 .TestQS8();
2322 }
2323 }
2324
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_width_adjustment)2325 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_width_adjustment) {
2326 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2327 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2328 DeconvolutionOperatorTester()
2329 .input_size(kStridedInputHeight, kStridedInputWidth)
2330 .padding(1)
2331 .adjustment_width(adjustment_width)
2332 .kernel_size(3, 3)
2333 .stride(2)
2334 .groups(2)
2335 .group_input_channels(17)
2336 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2337 .iterations(1)
2338 .TestQS8();
2339 }
2340 }
2341
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_input_height)2342 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_input_height) {
2343 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2344 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2345 DeconvolutionOperatorTester()
2346 .input_size(input_height, kStridedInputWidth)
2347 .padding(1)
2348 .kernel_size(3, 3)
2349 .stride(2)
2350 .groups(2)
2351 .group_input_channels(17)
2352 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2353 .iterations(1)
2354 .TestQS8();
2355 }
2356 }
2357
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_input_width)2358 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_input_width) {
2359 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2360 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2361 DeconvolutionOperatorTester()
2362 .input_size(kStridedInputHeight, kStridedInputWidth)
2363 .padding(1)
2364 .kernel_size(3, 3)
2365 .stride(2)
2366 .groups(2)
2367 .group_input_channels(17)
2368 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2369 .iterations(1)
2370 .TestQS8();
2371 }
2372 }
2373
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_input_channels)2374 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_input_channels) {
2375 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2376 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
2377 DeconvolutionOperatorTester()
2378 .input_size(kStridedInputHeight, kStridedInputWidth)
2379 .padding(1)
2380 .kernel_size(3, 3)
2381 .stride(2)
2382 .groups(2)
2383 .group_input_channels(input_channels)
2384 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2385 .iterations(1)
2386 .TestQS8();
2387 }
2388 }
2389
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_output_channels)2390 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_output_channels) {
2391 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2392 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
2393 DeconvolutionOperatorTester()
2394 .input_size(kStridedInputHeight, kStridedInputWidth)
2395 .padding(1)
2396 .kernel_size(3, 3)
2397 .stride(2)
2398 .groups(2)
2399 .group_input_channels(17)
2400 .group_output_channels(output_channels)
2401 .iterations(1)
2402 .TestQS8();
2403 }
2404 }
2405
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_input_stride)2406 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_input_stride) {
2407 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2408 DeconvolutionOperatorTester()
2409 .input_size(kStridedInputHeight, kStridedInputWidth)
2410 .padding(1)
2411 .kernel_size(3, 3)
2412 .stride(2)
2413 .groups(2)
2414 .group_input_channels(17)
2415 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2416 .input_pixel_stride(37)
2417 .iterations(3)
2418 .TestQS8();
2419 }
2420
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_output_stride)2421 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_output_stride) {
2422 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2423 DeconvolutionOperatorTester()
2424 .input_size(kStridedInputHeight, kStridedInputWidth)
2425 .padding(1)
2426 .kernel_size(3, 3)
2427 .stride(2)
2428 .groups(2)
2429 .group_input_channels(17)
2430 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
2431 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
2432 .iterations(3)
2433 .TestQS8();
2434 }
2435
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_qmin)2436 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_qmin) {
2437 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2438 DeconvolutionOperatorTester()
2439 .input_size(kStridedInputHeight, kStridedInputWidth)
2440 .padding(1)
2441 .kernel_size(3, 3)
2442 .stride(2)
2443 .groups(2)
2444 .group_input_channels(17)
2445 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2446 .qmin(128)
2447 .iterations(3)
2448 .TestQS8();
2449 }
2450
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_qmax)2451 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_qmax) {
2452 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2453 DeconvolutionOperatorTester()
2454 .input_size(kStridedInputHeight, kStridedInputWidth)
2455 .padding(1)
2456 .kernel_size(3, 3)
2457 .stride(2)
2458 .groups(2)
2459 .group_input_channels(17)
2460 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2461 .qmax(128)
2462 .iterations(3)
2463 .TestQS8();
2464 }
2465
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_without_bias)2466 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_without_bias) {
2467 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2468 DeconvolutionOperatorTester()
2469 .has_bias(false)
2470 .input_size(kStridedInputHeight, kStridedInputWidth)
2471 .padding(1)
2472 .kernel_size(3, 3)
2473 .stride(2)
2474 .groups(2)
2475 .group_input_channels(17)
2476 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2477 .iterations(3)
2478 .TestQS8();
2479 }
2480
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_grouped_3x3s2)2481 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_grouped_3x3s2) {
2482 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2483 DeconvolutionOperatorTester()
2484 .input_size(kStridedInputHeight, kStridedInputWidth)
2485 .padding(1)
2486 .kernel_size(3, 3)
2487 .stride(2)
2488 .groups(2)
2489 .group_input_channels(17)
2490 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2491 .use_weights_cache(true)
2492 .iterations(3)
2493 .TestQS8();
2494 }
2495
2496 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
2497
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2)2498 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2) {
2499 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2500 DeconvolutionOperatorTester()
2501 .batch_size(2)
2502 .input_size(kStridedInputHeight, kStridedInputWidth)
2503 .padding(1)
2504 .kernel_size(3, 3)
2505 .stride(2)
2506 .group_input_channels(15)
2507 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2508 .iterations(3)
2509 .TestQS8();
2510 }
2511
TEST(DECONVOLUTION_NHWC_QS8,batched_Kx3s2)2512 TEST(DECONVOLUTION_NHWC_QS8, batched_Kx3s2) {
2513 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2514 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
2515 DeconvolutionOperatorTester()
2516 .batch_size(2)
2517 .input_size(kStridedInputHeight, kStridedInputWidth)
2518 .padding_width(1)
2519 .kernel_size(kernel_height, 3)
2520 .stride(2)
2521 .group_input_channels(17)
2522 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2523 .iterations(3)
2524 .TestQS8();
2525 }
2526 }
2527
TEST(DECONVOLUTION_NHWC_QS8,batched_3xKs2)2528 TEST(DECONVOLUTION_NHWC_QS8, batched_3xKs2) {
2529 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2530 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
2531 DeconvolutionOperatorTester()
2532 .batch_size(2)
2533 .input_size(kStridedInputHeight, kStridedInputWidth)
2534 .padding_height(1)
2535 .kernel_size(3, kernel_width)
2536 .stride(2)
2537 .group_input_channels(17)
2538 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2539 .iterations(3)
2540 .TestQS8();
2541 }
2542 }
2543
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3sSx1)2544 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3sSx1) {
2545 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2546 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
2547 DeconvolutionOperatorTester()
2548 .batch_size(2)
2549 .input_size(kStridedInputHeight, kStridedInputWidth)
2550 .padding(1)
2551 .padding_width(1)
2552 .kernel_size(3, 3)
2553 .stride_height(stride_height)
2554 .group_input_channels(17)
2555 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2556 .iterations(3)
2557 .TestQS8();
2558 }
2559 }
2560
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s1xS)2561 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s1xS) {
2562 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2563 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
2564 DeconvolutionOperatorTester()
2565 .batch_size(2)
2566 .input_size(kStridedInputHeight, kStridedInputWidth)
2567 .padding(1)
2568 .padding_width(1)
2569 .kernel_size(3, 3)
2570 .stride_width(stride_width)
2571 .group_input_channels(17)
2572 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2573 .iterations(3)
2574 .TestQS8();
2575 }
2576 }
2577
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_height_padding)2578 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_height_padding) {
2579 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2580 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
2581 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
2582 DeconvolutionOperatorTester()
2583 .batch_size(2)
2584 .input_size(kStridedInputHeight, kStridedInputWidth)
2585 .padding_width(1)
2586 .padding_top(padding_top)
2587 .padding_bottom(padding_bottom)
2588 .kernel_size(3, 3)
2589 .stride(2)
2590 .group_input_channels(15)
2591 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2592 .iterations(1)
2593 .TestQS8();
2594 }
2595 }
2596 }
2597
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_width_padding)2598 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_width_padding) {
2599 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2600 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
2601 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
2602 DeconvolutionOperatorTester()
2603 .batch_size(2)
2604 .input_size(kStridedInputHeight, kStridedInputWidth)
2605 .padding_height(1)
2606 .padding_left(padding_left)
2607 .padding_right(padding_right)
2608 .kernel_size(3, 3)
2609 .stride(2)
2610 .group_input_channels(15)
2611 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2612 .iterations(1)
2613 .TestQS8();
2614 }
2615 }
2616 }
2617
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_height_adjustment)2618 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_height_adjustment) {
2619 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2620 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2621 DeconvolutionOperatorTester()
2622 .batch_size(2)
2623 .input_size(kStridedInputHeight, kStridedInputWidth)
2624 .padding(1)
2625 .adjustment_height(adjustment_height)
2626 .kernel_size(3, 3)
2627 .stride(2)
2628 .group_input_channels(15)
2629 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2630 .iterations(1)
2631 .TestQS8();
2632 }
2633 }
2634
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_width_adjustment)2635 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_width_adjustment) {
2636 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2637 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2638 DeconvolutionOperatorTester()
2639 .batch_size(2)
2640 .input_size(kStridedInputHeight, kStridedInputWidth)
2641 .padding(1)
2642 .adjustment_width(adjustment_width)
2643 .kernel_size(3, 3)
2644 .stride(2)
2645 .group_input_channels(15)
2646 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2647 .iterations(1)
2648 .TestQS8();
2649 }
2650 }
2651
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_input_height)2652 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_input_height) {
2653 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2654 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2655 DeconvolutionOperatorTester()
2656 .batch_size(2)
2657 .input_size(input_height, kStridedInputWidth)
2658 .padding(1)
2659 .kernel_size(3, 3)
2660 .stride(2)
2661 .group_input_channels(15)
2662 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2663 .iterations(1)
2664 .TestQS8();
2665 }
2666 }
2667
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_input_width)2668 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_input_width) {
2669 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2670 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2671 DeconvolutionOperatorTester()
2672 .batch_size(2)
2673 .input_size(kStridedInputHeight, kStridedInputWidth)
2674 .padding(1)
2675 .kernel_size(3, 3)
2676 .stride(2)
2677 .group_input_channels(15)
2678 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2679 .iterations(1)
2680 .TestQS8();
2681 }
2682 }
2683
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_input_channels)2684 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_input_channels) {
2685 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2686 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
2687 DeconvolutionOperatorTester()
2688 .batch_size(2)
2689 .input_size(kStridedInputHeight, kStridedInputWidth)
2690 .padding(1)
2691 .kernel_size(3, 3)
2692 .stride(2)
2693 .group_input_channels(input_channels)
2694 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2695 .iterations(1)
2696 .TestQS8();
2697 }
2698 }
2699
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_output_channels)2700 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_output_channels) {
2701 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2702 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
2703 DeconvolutionOperatorTester()
2704 .batch_size(2)
2705 .input_size(kStridedInputHeight, kStridedInputWidth)
2706 .padding(1)
2707 .kernel_size(3, 3)
2708 .stride(2)
2709 .group_input_channels(23)
2710 .group_output_channels(output_channels)
2711 .iterations(1)
2712 .TestQS8();
2713 }
2714 }
2715
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_input_stride)2716 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_input_stride) {
2717 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2718 DeconvolutionOperatorTester()
2719 .batch_size(2)
2720 .input_size(kStridedInputHeight, kStridedInputWidth)
2721 .padding(1)
2722 .kernel_size(3, 3)
2723 .stride(2)
2724 .group_input_channels(23)
2725 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2726 .input_pixel_stride(28)
2727 .iterations(3)
2728 .TestQS8();
2729 }
2730
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_output_stride)2731 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_output_stride) {
2732 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2733 DeconvolutionOperatorTester()
2734 .batch_size(2)
2735 .input_size(kStridedInputHeight, kStridedInputWidth)
2736 .padding(1)
2737 .kernel_size(3, 3)
2738 .stride(2)
2739 .group_input_channels(23)
2740 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2741 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
2742 .iterations(3)
2743 .TestQS8();
2744 }
2745
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_qmin)2746 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_qmin) {
2747 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2748 DeconvolutionOperatorTester()
2749 .batch_size(2)
2750 .input_size(kStridedInputHeight, kStridedInputWidth)
2751 .padding(1)
2752 .kernel_size(3, 3)
2753 .stride(2)
2754 .group_input_channels(23)
2755 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2756 .qmin(128)
2757 .iterations(3)
2758 .TestQS8();
2759 }
2760
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_qmax)2761 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_qmax) {
2762 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2763 DeconvolutionOperatorTester()
2764 .batch_size(2)
2765 .input_size(kStridedInputHeight, kStridedInputWidth)
2766 .padding(1)
2767 .kernel_size(3, 3)
2768 .stride(2)
2769 .group_input_channels(23)
2770 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2771 .qmax(128)
2772 .iterations(3)
2773 .TestQS8();
2774 }
2775
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_without_bias)2776 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_without_bias) {
2777 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2778 DeconvolutionOperatorTester()
2779 .has_bias(false)
2780 .batch_size(2)
2781 .input_size(kStridedInputHeight, kStridedInputWidth)
2782 .padding(1)
2783 .kernel_size(3, 3)
2784 .stride(2)
2785 .group_input_channels(23)
2786 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2787 .iterations(3)
2788 .TestQS8();
2789 }
2790
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_3x3s2)2791 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_3x3s2) {
2792 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2793 DeconvolutionOperatorTester()
2794 .batch_size(2)
2795 .input_size(kStridedInputHeight, kStridedInputWidth)
2796 .padding(1)
2797 .kernel_size(3, 3)
2798 .stride(2)
2799 .group_input_channels(15)
2800 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2801 .use_weights_cache(true)
2802 .iterations(3)
2803 .TestQS8();
2804 }
2805
2806 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
2807
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2)2808 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2) {
2809 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2810 DeconvolutionOperatorTester()
2811 .batch_size(2)
2812 .input_size(kStridedInputHeight, kStridedInputWidth)
2813 .padding(1)
2814 .kernel_size(3, 3)
2815 .stride(2)
2816 .groups(2)
2817 .group_input_channels(17)
2818 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2819 .iterations(3)
2820 .TestQS8();
2821 }
2822
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_Kx3s2)2823 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_Kx3s2) {
2824 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2825 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
2826 DeconvolutionOperatorTester()
2827 .batch_size(2)
2828 .input_size(kStridedInputHeight, kStridedInputWidth)
2829 .padding_width(1)
2830 .kernel_size(kernel_height, 3)
2831 .stride(2)
2832 .groups(2)
2833 .group_input_channels(17)
2834 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2835 .iterations(3)
2836 .TestQS8();
2837 }
2838 }
2839
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3xKs2)2840 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3xKs2) {
2841 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2842 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
2843 DeconvolutionOperatorTester()
2844 .batch_size(2)
2845 .input_size(kStridedInputHeight, kStridedInputWidth)
2846 .padding_height(1)
2847 .kernel_size(3, kernel_width)
2848 .stride(2)
2849 .groups(2)
2850 .group_input_channels(17)
2851 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2852 .iterations(3)
2853 .TestQS8();
2854 }
2855 }
2856
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3sSx1)2857 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3sSx1) {
2858 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2859 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
2860 DeconvolutionOperatorTester()
2861 .batch_size(2)
2862 .input_size(kStridedInputHeight, kStridedInputWidth)
2863 .padding(1)
2864 .padding_width(1)
2865 .kernel_size(3, 3)
2866 .stride_height(stride_height)
2867 .groups(2)
2868 .group_input_channels(17)
2869 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2870 .iterations(3)
2871 .TestQS8();
2872 }
2873 }
2874
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s1xS)2875 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s1xS) {
2876 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2877 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
2878 DeconvolutionOperatorTester()
2879 .batch_size(2)
2880 .input_size(kStridedInputHeight, kStridedInputWidth)
2881 .padding(1)
2882 .padding_width(1)
2883 .kernel_size(3, 3)
2884 .stride_width(stride_width)
2885 .groups(2)
2886 .group_input_channels(17)
2887 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2888 .iterations(3)
2889 .TestQS8();
2890 }
2891 }
2892
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_height_padding)2893 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_height_padding) {
2894 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2895 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
2896 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
2897 DeconvolutionOperatorTester()
2898 .batch_size(2)
2899 .input_size(kStridedInputHeight, kStridedInputWidth)
2900 .padding_width(1)
2901 .padding_top(padding_top)
2902 .padding_bottom(padding_bottom)
2903 .kernel_size(3, 3)
2904 .stride(2)
2905 .groups(2)
2906 .group_input_channels(17)
2907 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2908 .iterations(1)
2909 .TestQS8();
2910 }
2911 }
2912 }
2913
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_width_padding)2914 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_width_padding) {
2915 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2916 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
2917 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
2918 DeconvolutionOperatorTester()
2919 .batch_size(2)
2920 .input_size(kStridedInputHeight, kStridedInputWidth)
2921 .padding_height(1)
2922 .padding_left(padding_left)
2923 .padding_right(padding_right)
2924 .kernel_size(3, 3)
2925 .stride(2)
2926 .groups(2)
2927 .group_input_channels(17)
2928 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2929 .iterations(1)
2930 .TestQS8();
2931 }
2932 }
2933 }
2934
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_height_adjustment)2935 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_height_adjustment) {
2936 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2937 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2938 DeconvolutionOperatorTester()
2939 .batch_size(2)
2940 .input_size(kStridedInputHeight, kStridedInputWidth)
2941 .padding(1)
2942 .adjustment_height(adjustment_height)
2943 .kernel_size(3, 3)
2944 .stride(2)
2945 .groups(2)
2946 .group_input_channels(17)
2947 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2948 .iterations(1)
2949 .TestQS8();
2950 }
2951 }
2952
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_width_adjustment)2953 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_width_adjustment) {
2954 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2955 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2956 DeconvolutionOperatorTester()
2957 .batch_size(2)
2958 .input_size(kStridedInputHeight, kStridedInputWidth)
2959 .padding(1)
2960 .adjustment_width(adjustment_width)
2961 .kernel_size(3, 3)
2962 .stride(2)
2963 .groups(2)
2964 .group_input_channels(17)
2965 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2966 .iterations(1)
2967 .TestQS8();
2968 }
2969 }
2970
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_input_height)2971 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_input_height) {
2972 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2973 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2974 DeconvolutionOperatorTester()
2975 .batch_size(2)
2976 .input_size(input_height, kStridedInputWidth)
2977 .padding(1)
2978 .kernel_size(3, 3)
2979 .stride(2)
2980 .groups(2)
2981 .group_input_channels(17)
2982 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2983 .iterations(1)
2984 .TestQS8();
2985 }
2986 }
2987
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_input_width)2988 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_input_width) {
2989 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2990 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2991 DeconvolutionOperatorTester()
2992 .batch_size(2)
2993 .input_size(kStridedInputHeight, kStridedInputWidth)
2994 .padding(1)
2995 .kernel_size(3, 3)
2996 .stride(2)
2997 .groups(2)
2998 .group_input_channels(17)
2999 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3000 .iterations(1)
3001 .TestQS8();
3002 }
3003 }
3004
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_input_channels)3005 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_input_channels) {
3006 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3007 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
3008 DeconvolutionOperatorTester()
3009 .batch_size(2)
3010 .input_size(kStridedInputHeight, kStridedInputWidth)
3011 .padding(1)
3012 .kernel_size(3, 3)
3013 .stride(2)
3014 .groups(2)
3015 .group_input_channels(input_channels)
3016 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3017 .iterations(1)
3018 .TestQS8();
3019 }
3020 }
3021
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_output_channels)3022 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_output_channels) {
3023 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3024 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3025 DeconvolutionOperatorTester()
3026 .batch_size(2)
3027 .input_size(kStridedInputHeight, kStridedInputWidth)
3028 .padding(1)
3029 .kernel_size(3, 3)
3030 .stride(2)
3031 .groups(2)
3032 .group_input_channels(17)
3033 .group_output_channels(output_channels)
3034 .iterations(1)
3035 .TestQS8();
3036 }
3037 }
3038
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_input_stride)3039 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_input_stride) {
3040 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3041 DeconvolutionOperatorTester()
3042 .batch_size(2)
3043 .input_size(kStridedInputHeight, kStridedInputWidth)
3044 .padding(1)
3045 .kernel_size(3, 3)
3046 .stride(2)
3047 .groups(2)
3048 .group_input_channels(17)
3049 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3050 .input_pixel_stride(37)
3051 .iterations(3)
3052 .TestQS8();
3053 }
3054
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_output_stride)3055 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_output_stride) {
3056 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3057 DeconvolutionOperatorTester()
3058 .batch_size(2)
3059 .input_size(kStridedInputHeight, kStridedInputWidth)
3060 .padding(1)
3061 .kernel_size(3, 3)
3062 .stride(2)
3063 .groups(2)
3064 .group_input_channels(17)
3065 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
3066 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3067 .iterations(3)
3068 .TestQS8();
3069 }
3070
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_qmin)3071 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_qmin) {
3072 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3073 DeconvolutionOperatorTester()
3074 .batch_size(2)
3075 .input_size(kStridedInputHeight, kStridedInputWidth)
3076 .padding(1)
3077 .kernel_size(3, 3)
3078 .stride(2)
3079 .groups(2)
3080 .group_input_channels(17)
3081 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3082 .qmin(128)
3083 .iterations(3)
3084 .TestQS8();
3085 }
3086
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_qmax)3087 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_qmax) {
3088 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3089 DeconvolutionOperatorTester()
3090 .batch_size(2)
3091 .input_size(kStridedInputHeight, kStridedInputWidth)
3092 .padding(1)
3093 .kernel_size(3, 3)
3094 .stride(2)
3095 .groups(2)
3096 .group_input_channels(17)
3097 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3098 .qmax(128)
3099 .iterations(3)
3100 .TestQS8();
3101 }
3102
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_without_bias)3103 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_without_bias) {
3104 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3105 DeconvolutionOperatorTester()
3106 .has_bias(false)
3107 .batch_size(2)
3108 .input_size(kStridedInputHeight, kStridedInputWidth)
3109 .padding(1)
3110 .kernel_size(3, 3)
3111 .stride(2)
3112 .groups(2)
3113 .group_input_channels(17)
3114 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3115 .iterations(3)
3116 .TestQS8();
3117 }
3118
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_grouped_3x3s2)3119 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_grouped_3x3s2) {
3120 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3121 DeconvolutionOperatorTester()
3122 .batch_size(2)
3123 .input_size(kStridedInputHeight, kStridedInputWidth)
3124 .padding(1)
3125 .kernel_size(3, 3)
3126 .stride(2)
3127 .groups(2)
3128 .group_input_channels(17)
3129 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3130 .use_weights_cache(true)
3131 .iterations(3)
3132 .TestQS8();
3133 }
3134
3135 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
3136
3137 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_setup_changing_batch) {
3138 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3139 DeconvolutionOperatorTester()
3140 .batch_size(2)
3141 .next_batch_size(5)
3142 .input_size(kStridedInputHeight, kStridedInputWidth)
3143 .kernel_height(3)
3144 .kernel_width(5)
3145 .stride(2)
3146 .groups(2)
3147 .group_input_channels(15)
3148 .group_output_channels(17)
3149 .TestSetupQS8();
3150 }
3151
3152 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_setup_changing_height) {
3153 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3154 DeconvolutionOperatorTester()
3155 .batch_size(2)
3156 .input_size(kStridedInputHeight, kStridedInputWidth)
3157 .next_input_height(kStridedInputHeight + 3)
3158 .kernel_height(3)
3159 .kernel_width(5)
3160 .stride(2)
3161 .groups(2)
3162 .group_input_channels(15)
3163 .group_output_channels(17)
3164 .TestSetupQS8();
3165 }
3166
3167 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_setup_changing_width) {
3168 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3169 DeconvolutionOperatorTester()
3170 .batch_size(2)
3171 .input_size(kStridedInputHeight, kStridedInputWidth)
3172 .next_input_width(kStridedInputWidth + 3)
3173 .kernel_height(3)
3174 .kernel_width(5)
3175 .stride(2)
3176 .groups(2)
3177 .group_input_channels(15)
3178 .group_output_channels(17)
3179 .TestSetupQS8();
3180 }
3181
3182 /**************************** SUBCONV2D/GEMM path ****************************/
3183
3184 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2) {
3185 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3186 DeconvolutionOperatorTester()
3187 .input_size(kStridedInputHeight, kStridedInputWidth)
3188 .kernel_size(2, 2)
3189 .stride(2)
3190 .group_input_channels(15)
3191 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3192 .iterations(3)
3193 .TestQS8();
3194 }
3195
TEST(DECONVOLUTION_NHWC_QS8,Kx2sKx2)3196 TEST(DECONVOLUTION_NHWC_QS8, Kx2sKx2) {
3197 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3198 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3199 DeconvolutionOperatorTester()
3200 .input_size(kStridedInputHeight, kStridedInputWidth)
3201 .kernel_size(kernel_height, 2)
3202 .stride(kernel_height, 2)
3203 .group_input_channels(17)
3204 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3205 .iterations(3)
3206 .TestQS8();
3207 }
3208 }
3209
3210 TEST(DECONVOLUTION_NHWC_QS8, 2xKs2xK) {
3211 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3212 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3213 DeconvolutionOperatorTester()
3214 .input_size(kStridedInputHeight, kStridedInputWidth)
3215 .kernel_size(2, kernel_width)
3216 .stride(2, kernel_width)
3217 .group_input_channels(17)
3218 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3219 .iterations(3)
3220 .TestQS8();
3221 }
3222 }
3223
3224 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_height_adjustment) {
3225 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3226 DeconvolutionOperatorTester()
3227 .input_size(kStridedInputHeight, kStridedInputWidth)
3228 .adjustment_height(1)
3229 .kernel_size(2, 2)
3230 .stride(2)
3231 .group_input_channels(15)
3232 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3233 .iterations(1)
3234 .TestQS8();
3235 }
3236
3237 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_width_adjustment) {
3238 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3239 DeconvolutionOperatorTester()
3240 .input_size(kStridedInputHeight, kStridedInputWidth)
3241 .adjustment_width(1)
3242 .kernel_size(2, 2)
3243 .stride(2)
3244 .group_input_channels(15)
3245 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3246 .iterations(1)
3247 .TestQS8();
3248 }
3249
3250 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_input_height) {
3251 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3252 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3253 DeconvolutionOperatorTester()
3254 .input_size(input_height, kStridedInputWidth)
3255 .kernel_size(2, 2)
3256 .stride(2)
3257 .group_input_channels(15)
3258 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3259 .iterations(1)
3260 .TestQS8();
3261 }
3262 }
3263
3264 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_input_width) {
3265 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3266 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3267 DeconvolutionOperatorTester()
3268 .input_size(kStridedInputHeight, kStridedInputWidth)
3269 .kernel_size(2, 2)
3270 .stride(2)
3271 .group_input_channels(15)
3272 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3273 .iterations(1)
3274 .TestQS8();
3275 }
3276 }
3277
3278 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_input_channels) {
3279 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3280 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
3281 DeconvolutionOperatorTester()
3282 .input_size(kStridedInputHeight, kStridedInputWidth)
3283 .kernel_size(2, 2)
3284 .stride(2)
3285 .group_input_channels(input_channels)
3286 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3287 .iterations(1)
3288 .TestQS8();
3289 }
3290 }
3291
3292 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_output_channels) {
3293 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3294 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3295 DeconvolutionOperatorTester()
3296 .input_size(kStridedInputHeight, kStridedInputWidth)
3297 .kernel_size(2, 2)
3298 .stride(2)
3299 .group_input_channels(23)
3300 .group_output_channels(output_channels)
3301 .iterations(1)
3302 .TestQS8();
3303 }
3304 }
3305
3306 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_input_stride) {
3307 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3308 DeconvolutionOperatorTester()
3309 .input_size(kStridedInputHeight, kStridedInputWidth)
3310 .kernel_size(2, 2)
3311 .stride(2)
3312 .group_input_channels(23)
3313 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3314 .input_pixel_stride(28)
3315 .iterations(3)
3316 .TestQS8();
3317 }
3318
3319 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_output_stride) {
3320 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3321 DeconvolutionOperatorTester()
3322 .input_size(kStridedInputHeight, kStridedInputWidth)
3323 .kernel_size(2, 2)
3324 .stride(2)
3325 .group_input_channels(23)
3326 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3327 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3328 .iterations(3)
3329 .TestQS8();
3330 }
3331
3332 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_qmin) {
3333 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3334 DeconvolutionOperatorTester()
3335 .input_size(kStridedInputHeight, kStridedInputWidth)
3336 .kernel_size(2, 2)
3337 .stride(2)
3338 .group_input_channels(23)
3339 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3340 .qmin(128)
3341 .iterations(3)
3342 .TestQS8();
3343 }
3344
3345 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_qmax) {
3346 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3347 DeconvolutionOperatorTester()
3348 .input_size(kStridedInputHeight, kStridedInputWidth)
3349 .kernel_size(2, 2)
3350 .stride(2)
3351 .group_input_channels(23)
3352 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3353 .qmax(128)
3354 .iterations(3)
3355 .TestQS8();
3356 }
3357
3358 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_without_bias) {
3359 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3360 DeconvolutionOperatorTester()
3361 .has_bias(false)
3362 .input_size(kStridedInputHeight, kStridedInputWidth)
3363 .kernel_size(2, 2)
3364 .stride(2)
3365 .group_input_channels(23)
3366 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3367 .iterations(3)
3368 .TestQS8();
3369 }
3370
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_2x2s2)3371 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_2x2s2) {
3372 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3373 DeconvolutionOperatorTester()
3374 .input_size(kStridedInputHeight, kStridedInputWidth)
3375 .kernel_size(2, 2)
3376 .stride(2)
3377 .group_input_channels(15)
3378 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3379 .use_weights_cache(true)
3380 .iterations(3)
3381 .TestQS8();
3382 }
3383
3384 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
3385
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2)3386 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2) {
3387 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3388 DeconvolutionOperatorTester()
3389 .input_size(kStridedInputHeight, kStridedInputWidth)
3390 .kernel_size(2, 2)
3391 .stride(2)
3392 .groups(2)
3393 .group_input_channels(17)
3394 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3395 .iterations(3)
3396 .TestQS8();
3397 }
3398
TEST(DECONVOLUTION_NHWC_QS8,grouped_Kx2sKx2)3399 TEST(DECONVOLUTION_NHWC_QS8, grouped_Kx2sKx2) {
3400 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3401 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3402 DeconvolutionOperatorTester()
3403 .input_size(kStridedInputHeight, kStridedInputWidth)
3404 .kernel_size(kernel_height, 2)
3405 .stride(kernel_height, 2)
3406 .groups(2)
3407 .group_input_channels(17)
3408 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3409 .iterations(3)
3410 .TestQS8();
3411 }
3412 }
3413
TEST(DECONVOLUTION_NHWC_QS8,grouped_2xKs2xK)3414 TEST(DECONVOLUTION_NHWC_QS8, grouped_2xKs2xK) {
3415 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3416 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3417 DeconvolutionOperatorTester()
3418 .input_size(kStridedInputHeight, kStridedInputWidth)
3419 .kernel_size(2, kernel_width)
3420 .stride(2, kernel_width)
3421 .groups(2)
3422 .group_input_channels(17)
3423 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3424 .iterations(3)
3425 .TestQS8();
3426 }
3427 }
3428
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_height_adjustment)3429 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_height_adjustment) {
3430 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3431 DeconvolutionOperatorTester()
3432 .input_size(kStridedInputHeight, kStridedInputWidth)
3433 .adjustment_height(1)
3434 .kernel_size(2, 2)
3435 .stride(2)
3436 .groups(2)
3437 .group_input_channels(17)
3438 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3439 .iterations(1)
3440 .TestQS8();
3441 }
3442
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_width_adjustment)3443 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_width_adjustment) {
3444 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3445 DeconvolutionOperatorTester()
3446 .input_size(kStridedInputHeight, kStridedInputWidth)
3447 .adjustment_width(1)
3448 .kernel_size(2, 2)
3449 .stride(2)
3450 .groups(2)
3451 .group_input_channels(17)
3452 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3453 .iterations(1)
3454 .TestQS8();
3455 }
3456
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_input_height)3457 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_input_height) {
3458 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3459 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3460 DeconvolutionOperatorTester()
3461 .input_size(input_height, kStridedInputWidth)
3462 .kernel_size(2, 2)
3463 .stride(2)
3464 .groups(2)
3465 .group_input_channels(17)
3466 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3467 .iterations(1)
3468 .TestQS8();
3469 }
3470 }
3471
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_input_width)3472 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_input_width) {
3473 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3474 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3475 DeconvolutionOperatorTester()
3476 .input_size(kStridedInputHeight, kStridedInputWidth)
3477 .kernel_size(2, 2)
3478 .stride(2)
3479 .groups(2)
3480 .group_input_channels(17)
3481 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3482 .iterations(1)
3483 .TestQS8();
3484 }
3485 }
3486
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_input_channels)3487 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_input_channels) {
3488 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3489 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
3490 DeconvolutionOperatorTester()
3491 .input_size(kStridedInputHeight, kStridedInputWidth)
3492 .kernel_size(2, 2)
3493 .stride(2)
3494 .groups(2)
3495 .group_input_channels(input_channels)
3496 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3497 .iterations(1)
3498 .TestQS8();
3499 }
3500 }
3501
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_output_channels)3502 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_output_channels) {
3503 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3504 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3505 DeconvolutionOperatorTester()
3506 .input_size(kStridedInputHeight, kStridedInputWidth)
3507 .kernel_size(2, 2)
3508 .stride(2)
3509 .groups(2)
3510 .group_input_channels(17)
3511 .group_output_channels(output_channels)
3512 .iterations(1)
3513 .TestQS8();
3514 }
3515 }
3516
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_input_stride)3517 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_input_stride) {
3518 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3519 DeconvolutionOperatorTester()
3520 .input_size(kStridedInputHeight, kStridedInputWidth)
3521 .kernel_size(2, 2)
3522 .stride(2)
3523 .groups(2)
3524 .group_input_channels(17)
3525 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3526 .input_pixel_stride(37)
3527 .iterations(3)
3528 .TestQS8();
3529 }
3530
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_output_stride)3531 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_output_stride) {
3532 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3533 DeconvolutionOperatorTester()
3534 .input_size(kStridedInputHeight, kStridedInputWidth)
3535 .kernel_size(2, 2)
3536 .stride(2)
3537 .groups(2)
3538 .group_input_channels(17)
3539 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
3540 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3541 .iterations(3)
3542 .TestQS8();
3543 }
3544
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_qmin)3545 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_qmin) {
3546 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3547 DeconvolutionOperatorTester()
3548 .input_size(kStridedInputHeight, kStridedInputWidth)
3549 .kernel_size(2, 2)
3550 .stride(2)
3551 .groups(2)
3552 .group_input_channels(17)
3553 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3554 .qmin(128)
3555 .iterations(3)
3556 .TestQS8();
3557 }
3558
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_qmax)3559 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_qmax) {
3560 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3561 DeconvolutionOperatorTester()
3562 .input_size(kStridedInputHeight, kStridedInputWidth)
3563 .kernel_size(2, 2)
3564 .stride(2)
3565 .groups(2)
3566 .group_input_channels(17)
3567 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3568 .qmax(128)
3569 .iterations(3)
3570 .TestQS8();
3571 }
3572
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_without_bias)3573 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_without_bias) {
3574 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3575 DeconvolutionOperatorTester()
3576 .has_bias(false)
3577 .input_size(kStridedInputHeight, kStridedInputWidth)
3578 .kernel_size(2, 2)
3579 .stride(2)
3580 .groups(2)
3581 .group_input_channels(17)
3582 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3583 .iterations(3)
3584 .TestQS8();
3585 }
3586
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_grouped_2x2s2)3587 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_grouped_2x2s2) {
3588 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3589 DeconvolutionOperatorTester()
3590 .input_size(kStridedInputHeight, kStridedInputWidth)
3591 .kernel_size(2, 2)
3592 .stride(2)
3593 .groups(2)
3594 .group_input_channels(17)
3595 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3596 .use_weights_cache(true)
3597 .iterations(3)
3598 .TestQS8();
3599 }
3600
3601 /**************************** SUBCONV2D/GEMM path, batched ****************************/
3602
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2)3603 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2) {
3604 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3605 DeconvolutionOperatorTester()
3606 .batch_size(2)
3607 .input_size(kStridedInputHeight, kStridedInputWidth)
3608 .kernel_size(2, 2)
3609 .stride(2)
3610 .group_input_channels(15)
3611 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3612 .iterations(3)
3613 .TestQS8();
3614 }
3615
TEST(DECONVOLUTION_NHWC_QS8,batched_Kx2sKx2)3616 TEST(DECONVOLUTION_NHWC_QS8, batched_Kx2sKx2) {
3617 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3618 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3619 DeconvolutionOperatorTester()
3620 .batch_size(2)
3621 .input_size(kStridedInputHeight, kStridedInputWidth)
3622 .kernel_size(kernel_height, 2)
3623 .stride(kernel_height, 2)
3624 .group_input_channels(17)
3625 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3626 .iterations(3)
3627 .TestQS8();
3628 }
3629 }
3630
TEST(DECONVOLUTION_NHWC_QS8,batched_2xKs2xK)3631 TEST(DECONVOLUTION_NHWC_QS8, batched_2xKs2xK) {
3632 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3633 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3634 DeconvolutionOperatorTester()
3635 .batch_size(2)
3636 .input_size(kStridedInputHeight, kStridedInputWidth)
3637 .kernel_size(2, kernel_width)
3638 .stride(2, kernel_width)
3639 .group_input_channels(17)
3640 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3641 .iterations(3)
3642 .TestQS8();
3643 }
3644 }
3645
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_height_adjustment)3646 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_height_adjustment) {
3647 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3648 DeconvolutionOperatorTester()
3649 .batch_size(2)
3650 .input_size(kStridedInputHeight, kStridedInputWidth)
3651 .adjustment_height(1)
3652 .kernel_size(2, 2)
3653 .stride(2)
3654 .group_input_channels(15)
3655 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3656 .iterations(1)
3657 .TestQS8();
3658 }
3659
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_width_adjustment)3660 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_width_adjustment) {
3661 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3662 DeconvolutionOperatorTester()
3663 .batch_size(2)
3664 .input_size(kStridedInputHeight, kStridedInputWidth)
3665 .adjustment_width(1)
3666 .kernel_size(2, 2)
3667 .stride(2)
3668 .group_input_channels(15)
3669 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3670 .iterations(1)
3671 .TestQS8();
3672 }
3673
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_input_height)3674 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_input_height) {
3675 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3676 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3677 DeconvolutionOperatorTester()
3678 .batch_size(2)
3679 .input_size(input_height, kStridedInputWidth)
3680 .kernel_size(2, 2)
3681 .stride(2)
3682 .group_input_channels(15)
3683 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3684 .iterations(1)
3685 .TestQS8();
3686 }
3687 }
3688
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_input_width)3689 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_input_width) {
3690 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3691 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3692 DeconvolutionOperatorTester()
3693 .batch_size(2)
3694 .input_size(kStridedInputHeight, kStridedInputWidth)
3695 .kernel_size(2, 2)
3696 .stride(2)
3697 .group_input_channels(15)
3698 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3699 .iterations(1)
3700 .TestQS8();
3701 }
3702 }
3703
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_input_channels)3704 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_input_channels) {
3705 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3706 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
3707 DeconvolutionOperatorTester()
3708 .batch_size(2)
3709 .input_size(kStridedInputHeight, kStridedInputWidth)
3710 .kernel_size(2, 2)
3711 .stride(2)
3712 .group_input_channels(input_channels)
3713 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3714 .iterations(1)
3715 .TestQS8();
3716 }
3717 }
3718
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_output_channels)3719 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_output_channels) {
3720 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3721 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3722 DeconvolutionOperatorTester()
3723 .batch_size(2)
3724 .input_size(kStridedInputHeight, kStridedInputWidth)
3725 .kernel_size(2, 2)
3726 .stride(2)
3727 .group_input_channels(23)
3728 .group_output_channels(output_channels)
3729 .iterations(1)
3730 .TestQS8();
3731 }
3732 }
3733
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_input_stride)3734 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_input_stride) {
3735 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3736 DeconvolutionOperatorTester()
3737 .batch_size(2)
3738 .input_size(kStridedInputHeight, kStridedInputWidth)
3739 .kernel_size(2, 2)
3740 .stride(2)
3741 .group_input_channels(23)
3742 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3743 .input_pixel_stride(28)
3744 .iterations(3)
3745 .TestQS8();
3746 }
3747
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_output_stride)3748 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_output_stride) {
3749 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3750 DeconvolutionOperatorTester()
3751 .batch_size(2)
3752 .input_size(kStridedInputHeight, kStridedInputWidth)
3753 .kernel_size(2, 2)
3754 .stride(2)
3755 .group_input_channels(23)
3756 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3757 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3758 .iterations(3)
3759 .TestQS8();
3760 }
3761
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_qmin)3762 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_qmin) {
3763 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3764 DeconvolutionOperatorTester()
3765 .batch_size(2)
3766 .input_size(kStridedInputHeight, kStridedInputWidth)
3767 .kernel_size(2, 2)
3768 .stride(2)
3769 .group_input_channels(23)
3770 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3771 .qmin(128)
3772 .iterations(3)
3773 .TestQS8();
3774 }
3775
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_qmax)3776 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_qmax) {
3777 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3778 DeconvolutionOperatorTester()
3779 .batch_size(2)
3780 .input_size(kStridedInputHeight, kStridedInputWidth)
3781 .kernel_size(2, 2)
3782 .stride(2)
3783 .group_input_channels(23)
3784 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3785 .qmax(128)
3786 .iterations(3)
3787 .TestQS8();
3788 }
3789
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_without_bias)3790 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_without_bias) {
3791 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3792 DeconvolutionOperatorTester()
3793 .has_bias(false)
3794 .batch_size(2)
3795 .input_size(kStridedInputHeight, kStridedInputWidth)
3796 .kernel_size(2, 2)
3797 .stride(2)
3798 .group_input_channels(23)
3799 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3800 .iterations(3)
3801 .TestQS8();
3802 }
3803
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_2x2s2)3804 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_2x2s2) {
3805 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3806 DeconvolutionOperatorTester()
3807 .batch_size(2)
3808 .input_size(kStridedInputHeight, kStridedInputWidth)
3809 .kernel_size(2, 2)
3810 .stride(2)
3811 .group_input_channels(15)
3812 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3813 .use_weights_cache(true)
3814 .iterations(3)
3815 .TestQS8();
3816 }
3817
3818 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
3819
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2)3820 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2) {
3821 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3822 DeconvolutionOperatorTester()
3823 .batch_size(2)
3824 .input_size(kStridedInputHeight, kStridedInputWidth)
3825 .kernel_size(2, 2)
3826 .stride(2)
3827 .groups(2)
3828 .group_input_channels(17)
3829 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3830 .iterations(3)
3831 .TestQS8();
3832 }
3833
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_Kx2sKx2)3834 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_Kx2sKx2) {
3835 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3836 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3837 DeconvolutionOperatorTester()
3838 .batch_size(2)
3839 .input_size(kStridedInputHeight, kStridedInputWidth)
3840 .kernel_size(kernel_height, 2)
3841 .stride(kernel_height, 2)
3842 .groups(2)
3843 .group_input_channels(17)
3844 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3845 .iterations(3)
3846 .TestQS8();
3847 }
3848 }
3849
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2xKs2xK)3850 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2xKs2xK) {
3851 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3852 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3853 DeconvolutionOperatorTester()
3854 .batch_size(2)
3855 .input_size(kStridedInputHeight, kStridedInputWidth)
3856 .kernel_size(2, kernel_width)
3857 .stride(2, kernel_width)
3858 .groups(2)
3859 .group_input_channels(17)
3860 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3861 .iterations(3)
3862 .TestQS8();
3863 }
3864 }
3865
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_height_adjustment)3866 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_height_adjustment) {
3867 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3868 DeconvolutionOperatorTester()
3869 .batch_size(2)
3870 .input_size(kStridedInputHeight, kStridedInputWidth)
3871 .adjustment_height(1)
3872 .kernel_size(2, 2)
3873 .stride(2)
3874 .groups(2)
3875 .group_input_channels(17)
3876 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3877 .iterations(1)
3878 .TestQS8();
3879 }
3880
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_width_adjustment)3881 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_width_adjustment) {
3882 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3883 DeconvolutionOperatorTester()
3884 .batch_size(2)
3885 .input_size(kStridedInputHeight, kStridedInputWidth)
3886 .adjustment_width(1)
3887 .kernel_size(2, 2)
3888 .stride(2)
3889 .groups(2)
3890 .group_input_channels(17)
3891 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3892 .iterations(1)
3893 .TestQS8();
3894 }
3895
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_input_height)3896 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_input_height) {
3897 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3898 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3899 DeconvolutionOperatorTester()
3900 .batch_size(2)
3901 .input_size(input_height, kStridedInputWidth)
3902 .kernel_size(2, 2)
3903 .stride(2)
3904 .groups(2)
3905 .group_input_channels(17)
3906 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3907 .iterations(1)
3908 .TestQS8();
3909 }
3910 }
3911
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_input_width)3912 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_input_width) {
3913 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3914 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3915 DeconvolutionOperatorTester()
3916 .batch_size(2)
3917 .input_size(kStridedInputHeight, kStridedInputWidth)
3918 .kernel_size(2, 2)
3919 .stride(2)
3920 .groups(2)
3921 .group_input_channels(17)
3922 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3923 .iterations(1)
3924 .TestQS8();
3925 }
3926 }
3927
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_input_channels)3928 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_input_channels) {
3929 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3930 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
3931 DeconvolutionOperatorTester()
3932 .batch_size(2)
3933 .input_size(kStridedInputHeight, kStridedInputWidth)
3934 .kernel_size(2, 2)
3935 .stride(2)
3936 .groups(2)
3937 .group_input_channels(input_channels)
3938 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3939 .iterations(1)
3940 .TestQS8();
3941 }
3942 }
3943
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_output_channels)3944 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_output_channels) {
3945 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3946 for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3947 DeconvolutionOperatorTester()
3948 .batch_size(2)
3949 .input_size(kStridedInputHeight, kStridedInputWidth)
3950 .kernel_size(2, 2)
3951 .stride(2)
3952 .groups(2)
3953 .group_input_channels(17)
3954 .group_output_channels(output_channels)
3955 .iterations(1)
3956 .TestQS8();
3957 }
3958 }
3959
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_input_stride)3960 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_input_stride) {
3961 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3962 DeconvolutionOperatorTester()
3963 .batch_size(2)
3964 .input_size(kStridedInputHeight, kStridedInputWidth)
3965 .kernel_size(2, 2)
3966 .stride(2)
3967 .groups(2)
3968 .group_input_channels(17)
3969 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3970 .input_pixel_stride(37)
3971 .iterations(3)
3972 .TestQS8();
3973 }
3974
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_output_stride)3975 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_output_stride) {
3976 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3977 DeconvolutionOperatorTester()
3978 .batch_size(2)
3979 .input_size(kStridedInputHeight, kStridedInputWidth)
3980 .kernel_size(2, 2)
3981 .stride(2)
3982 .groups(2)
3983 .group_input_channels(17)
3984 .group_output_channels(xnn_params.qs8.gemm.nr + 3)
3985 .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3986 .iterations(3)
3987 .TestQS8();
3988 }
3989
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_qmin)3990 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_qmin) {
3991 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3992 DeconvolutionOperatorTester()
3993 .batch_size(2)
3994 .input_size(kStridedInputHeight, kStridedInputWidth)
3995 .kernel_size(2, 2)
3996 .stride(2)
3997 .groups(2)
3998 .group_input_channels(17)
3999 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4000 .qmin(128)
4001 .iterations(3)
4002 .TestQS8();
4003 }
4004
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_qmax)4005 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_qmax) {
4006 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4007 DeconvolutionOperatorTester()
4008 .batch_size(2)
4009 .input_size(kStridedInputHeight, kStridedInputWidth)
4010 .kernel_size(2, 2)
4011 .stride(2)
4012 .groups(2)
4013 .group_input_channels(17)
4014 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4015 .qmax(128)
4016 .iterations(3)
4017 .TestQS8();
4018 }
4019
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_without_bias)4020 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_without_bias) {
4021 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4022 DeconvolutionOperatorTester()
4023 .has_bias(false)
4024 .batch_size(2)
4025 .input_size(kStridedInputHeight, kStridedInputWidth)
4026 .kernel_size(2, 2)
4027 .stride(2)
4028 .groups(2)
4029 .group_input_channels(17)
4030 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4031 .iterations(3)
4032 .TestQS8();
4033 }
4034
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_grouped_2x2s2)4035 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_grouped_2x2s2) {
4036 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4037 DeconvolutionOperatorTester()
4038 .batch_size(2)
4039 .input_size(kStridedInputHeight, kStridedInputWidth)
4040 .kernel_size(2, 2)
4041 .stride(2)
4042 .groups(2)
4043 .group_input_channels(17)
4044 .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4045 .use_weights_cache(true)
4046 .iterations(3)
4047 .TestQS8();
4048 }
4049
4050 /**************************** SUBCONV2D/GEMM path, setup ****************************/
4051
4052 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_setup_changing_batch) {
4053 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4054 DeconvolutionOperatorTester()
4055 .batch_size(2)
4056 .next_batch_size(5)
4057 .input_size(kStridedInputHeight, kStridedInputWidth)
4058 .kernel_size(2, 2)
4059 .stride(2)
4060 .groups(2)
4061 .group_input_channels(15)
4062 .group_output_channels(17)
4063 .TestSetupQS8();
4064 }
4065
4066 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_setup_changing_height) {
4067 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4068 DeconvolutionOperatorTester()
4069 .batch_size(2)
4070 .input_size(kStridedInputHeight, kStridedInputWidth)
4071 .next_input_height(kStridedInputHeight + 3)
4072 .kernel_size(2, 2)
4073 .stride(2)
4074 .groups(2)
4075 .group_input_channels(15)
4076 .group_output_channels(17)
4077 .TestSetupQS8();
4078 }
4079
4080 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_setup_changing_width) {
4081 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4082 DeconvolutionOperatorTester()
4083 .batch_size(2)
4084 .input_size(kStridedInputHeight, kStridedInputWidth)
4085 .next_input_width(kStridedInputWidth + 3)
4086 .kernel_size(2, 2)
4087 .stride(2)
4088 .groups(2)
4089 .group_input_channels(15)
4090 .group_output_channels(17)
4091 .TestSetupQS8();
4092 }
4093
4094 /**************************** Future GEMM path ****************************/
4095
4096 TEST(DECONVOLUTION_NHWC_QU8, 1x1) {
4097 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4098 DeconvolutionOperatorTester()
4099 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4100 .kernel_size(1, 1)
4101 .group_input_channels(23)
4102 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4103 .iterations(3)
4104 .TestQU8();
4105 }
4106
4107 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_input_width) {
4108 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4109 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4110 DeconvolutionOperatorTester()
4111 .input_size(input_height, kUnstridedInputWidth)
4112 .kernel_size(1, 1)
4113 .group_input_channels(23)
4114 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4115 .iterations(1)
4116 .TestQU8();
4117 }
4118 }
4119
4120 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_input_height) {
4121 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4122 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4123 DeconvolutionOperatorTester()
4124 .input_size(kUnstridedInputHeight, input_width)
4125 .kernel_size(1, 1)
4126 .group_input_channels(23)
4127 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4128 .iterations(1)
4129 .TestQU8();
4130 }
4131 }
4132
4133 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_input_channels) {
4134 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4135 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4136 DeconvolutionOperatorTester()
4137 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4138 .kernel_size(1, 1)
4139 .group_input_channels(input_channels)
4140 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4141 .iterations(1)
4142 .TestQU8();
4143 }
4144 }
4145
4146 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_output_channels) {
4147 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4148 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4149 DeconvolutionOperatorTester()
4150 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4151 .kernel_size(1, 1)
4152 .group_input_channels(23)
4153 .group_output_channels(output_channels)
4154 .iterations(1)
4155 .TestQU8();
4156 }
4157 }
4158
4159 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_input_stride) {
4160 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4161 DeconvolutionOperatorTester()
4162 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4163 .kernel_size(1, 1)
4164 .group_input_channels(23)
4165 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4166 .input_pixel_stride(28)
4167 .iterations(3)
4168 .TestQU8();
4169 }
4170
4171 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_output_stride) {
4172 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4173 DeconvolutionOperatorTester()
4174 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4175 .kernel_size(1, 1)
4176 .group_input_channels(23)
4177 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4178 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4179 .iterations(3)
4180 .TestQU8();
4181 }
4182
4183 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_qmin) {
4184 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4185 DeconvolutionOperatorTester()
4186 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4187 .kernel_size(1, 1)
4188 .group_input_channels(23)
4189 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4190 .qmin(128)
4191 .iterations(3)
4192 .TestQU8();
4193 }
4194
4195 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_qmax) {
4196 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4197 DeconvolutionOperatorTester()
4198 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4199 .kernel_size(1, 1)
4200 .group_input_channels(23)
4201 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4202 .qmax(128)
4203 .iterations(3)
4204 .TestQU8();
4205 }
4206
4207 TEST(DECONVOLUTION_NHWC_QU8, 1x1_without_bias) {
4208 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4209 DeconvolutionOperatorTester()
4210 .has_bias(false)
4211 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4212 .kernel_size(1, 1)
4213 .group_input_channels(23)
4214 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4215 .iterations(3)
4216 .TestQU8();
4217 }
4218
4219 /**************************** Future GEMM path, grouped ****************************/
4220
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1)4221 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1) {
4222 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4223 DeconvolutionOperatorTester()
4224 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4225 .kernel_size(1, 1)
4226 .groups(2)
4227 .group_input_channels(23)
4228 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4229 .iterations(3)
4230 .TestQU8();
4231 }
4232
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_input_width)4233 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_input_width) {
4234 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4235 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4236 DeconvolutionOperatorTester()
4237 .input_size(input_height, kUnstridedInputWidth)
4238 .kernel_size(1, 1)
4239 .groups(2)
4240 .group_input_channels(23)
4241 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4242 .iterations(1)
4243 .TestQU8();
4244 }
4245 }
4246
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_input_height)4247 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_input_height) {
4248 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4249 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4250 DeconvolutionOperatorTester()
4251 .input_size(kUnstridedInputHeight, input_width)
4252 .kernel_size(1, 1)
4253 .groups(2)
4254 .group_input_channels(23)
4255 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4256 .iterations(1)
4257 .TestQU8();
4258 }
4259 }
4260
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_input_channels)4261 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_input_channels) {
4262 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4263 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4264 DeconvolutionOperatorTester()
4265 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4266 .kernel_size(1, 1)
4267 .groups(2)
4268 .group_input_channels(input_channels)
4269 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4270 .iterations(1)
4271 .TestQU8();
4272 }
4273 }
4274
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_output_channels)4275 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_output_channels) {
4276 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4277 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4278 DeconvolutionOperatorTester()
4279 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4280 .kernel_size(1, 1)
4281 .groups(2)
4282 .group_input_channels(23)
4283 .group_output_channels(output_channels)
4284 .iterations(1)
4285 .TestQU8();
4286 }
4287 }
4288
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_input_stride)4289 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_input_stride) {
4290 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4291 DeconvolutionOperatorTester()
4292 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4293 .kernel_size(1, 1)
4294 .groups(2)
4295 .group_input_channels(23)
4296 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4297 .input_pixel_stride(47)
4298 .iterations(3)
4299 .TestQU8();
4300 }
4301
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_output_stride)4302 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_output_stride) {
4303 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4304 DeconvolutionOperatorTester()
4305 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4306 .kernel_size(1, 1)
4307 .groups(2)
4308 .group_input_channels(23)
4309 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
4310 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4311 .iterations(3)
4312 .TestQU8();
4313 }
4314
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_qmin)4315 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_qmin) {
4316 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4317 DeconvolutionOperatorTester()
4318 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4319 .kernel_size(1, 1)
4320 .groups(2)
4321 .group_input_channels(23)
4322 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4323 .qmin(128)
4324 .iterations(3)
4325 .TestQU8();
4326 }
4327
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_qmax)4328 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_qmax) {
4329 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4330 DeconvolutionOperatorTester()
4331 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4332 .kernel_size(1, 1)
4333 .groups(2)
4334 .group_input_channels(23)
4335 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4336 .qmax(128)
4337 .iterations(3)
4338 .TestQU8();
4339 }
4340
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_without_bias)4341 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_without_bias) {
4342 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4343 DeconvolutionOperatorTester()
4344 .has_bias(false)
4345 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4346 .kernel_size(1, 1)
4347 .groups(2)
4348 .group_input_channels(23)
4349 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4350 .iterations(3)
4351 .TestQU8();
4352 }
4353
4354 /**************************** Future GEMM path, batched ****************************/
4355
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1)4356 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1) {
4357 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4358 DeconvolutionOperatorTester()
4359 .batch_size(2)
4360 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4361 .kernel_size(1, 1)
4362 .group_input_channels(23)
4363 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4364 .iterations(3)
4365 .TestQU8();
4366 }
4367
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_input_width)4368 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_input_width) {
4369 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4370 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4371 DeconvolutionOperatorTester()
4372 .batch_size(2)
4373 .input_size(input_height, kUnstridedInputWidth)
4374 .kernel_size(1, 1)
4375 .group_input_channels(23)
4376 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4377 .iterations(1)
4378 .TestQU8();
4379 }
4380 }
4381
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_input_height)4382 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_input_height) {
4383 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4384 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4385 DeconvolutionOperatorTester()
4386 .batch_size(2)
4387 .input_size(kUnstridedInputHeight, input_width)
4388 .kernel_size(1, 1)
4389 .group_input_channels(23)
4390 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4391 .iterations(1)
4392 .TestQU8();
4393 }
4394 }
4395
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_input_channels)4396 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_input_channels) {
4397 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4398 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4399 DeconvolutionOperatorTester()
4400 .batch_size(2)
4401 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4402 .kernel_size(1, 1)
4403 .group_input_channels(input_channels)
4404 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4405 .iterations(1)
4406 .TestQU8();
4407 }
4408 }
4409
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_output_channels)4410 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_output_channels) {
4411 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4412 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4413 DeconvolutionOperatorTester()
4414 .batch_size(2)
4415 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4416 .kernel_size(1, 1)
4417 .group_input_channels(23)
4418 .group_output_channels(output_channels)
4419 .iterations(1)
4420 .TestQU8();
4421 }
4422 }
4423
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_input_stride)4424 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_input_stride) {
4425 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4426 DeconvolutionOperatorTester()
4427 .batch_size(2)
4428 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4429 .kernel_size(1, 1)
4430 .group_input_channels(23)
4431 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4432 .input_pixel_stride(28)
4433 .iterations(3)
4434 .TestQU8();
4435 }
4436
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_output_stride)4437 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_output_stride) {
4438 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4439 DeconvolutionOperatorTester()
4440 .batch_size(2)
4441 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4442 .kernel_size(1, 1)
4443 .group_input_channels(23)
4444 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4445 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4446 .iterations(3)
4447 .TestQU8();
4448 }
4449
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_qmin)4450 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_qmin) {
4451 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4452 DeconvolutionOperatorTester()
4453 .batch_size(2)
4454 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4455 .kernel_size(1, 1)
4456 .group_input_channels(23)
4457 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4458 .qmin(128)
4459 .iterations(3)
4460 .TestQU8();
4461 }
4462
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_qmax)4463 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_qmax) {
4464 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4465 DeconvolutionOperatorTester()
4466 .batch_size(2)
4467 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4468 .kernel_size(1, 1)
4469 .group_input_channels(23)
4470 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4471 .qmax(128)
4472 .iterations(3)
4473 .TestQU8();
4474 }
4475
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_without_bias)4476 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_without_bias) {
4477 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4478 DeconvolutionOperatorTester()
4479 .has_bias(false)
4480 .batch_size(2)
4481 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4482 .kernel_size(1, 1)
4483 .group_input_channels(23)
4484 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4485 .iterations(3)
4486 .TestQU8();
4487 }
4488
4489 /**************************** Future GEMM path, batched, grouped ****************************/
4490
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1)4491 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1) {
4492 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4493 DeconvolutionOperatorTester()
4494 .batch_size(2)
4495 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4496 .kernel_size(1, 1)
4497 .groups(2)
4498 .group_input_channels(23)
4499 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4500 .iterations(3)
4501 .TestQU8();
4502 }
4503
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_input_width)4504 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_input_width) {
4505 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4506 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4507 DeconvolutionOperatorTester()
4508 .batch_size(2)
4509 .input_size(input_height, kUnstridedInputWidth)
4510 .kernel_size(1, 1)
4511 .groups(2)
4512 .group_input_channels(23)
4513 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4514 .iterations(1)
4515 .TestQU8();
4516 }
4517 }
4518
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_input_height)4519 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_input_height) {
4520 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4521 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4522 DeconvolutionOperatorTester()
4523 .batch_size(2)
4524 .input_size(kUnstridedInputHeight, input_width)
4525 .kernel_size(1, 1)
4526 .groups(2)
4527 .group_input_channels(23)
4528 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4529 .iterations(1)
4530 .TestQU8();
4531 }
4532 }
4533
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_input_channels)4534 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_input_channels) {
4535 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4536 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4537 DeconvolutionOperatorTester()
4538 .batch_size(2)
4539 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4540 .kernel_size(1, 1)
4541 .groups(2)
4542 .group_input_channels(input_channels)
4543 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4544 .iterations(1)
4545 .TestQU8();
4546 }
4547 }
4548
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_output_channels)4549 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_output_channels) {
4550 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4551 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4552 DeconvolutionOperatorTester()
4553 .batch_size(2)
4554 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4555 .kernel_size(1, 1)
4556 .groups(2)
4557 .group_input_channels(23)
4558 .group_output_channels(output_channels)
4559 .iterations(1)
4560 .TestQU8();
4561 }
4562 }
4563
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_input_stride)4564 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_input_stride) {
4565 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4566 DeconvolutionOperatorTester()
4567 .batch_size(2)
4568 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4569 .kernel_size(1, 1)
4570 .groups(2)
4571 .group_input_channels(23)
4572 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4573 .input_pixel_stride(47)
4574 .iterations(3)
4575 .TestQU8();
4576 }
4577
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_output_stride)4578 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_output_stride) {
4579 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4580 DeconvolutionOperatorTester()
4581 .batch_size(2)
4582 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4583 .kernel_size(1, 1)
4584 .groups(2)
4585 .group_input_channels(23)
4586 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
4587 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4588 .iterations(3)
4589 .TestQU8();
4590 }
4591
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_qmin)4592 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_qmin) {
4593 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4594 DeconvolutionOperatorTester()
4595 .batch_size(2)
4596 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4597 .kernel_size(1, 1)
4598 .groups(2)
4599 .group_input_channels(23)
4600 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4601 .qmin(128)
4602 .iterations(3)
4603 .TestQU8();
4604 }
4605
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_qmax)4606 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_qmax) {
4607 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4608 DeconvolutionOperatorTester()
4609 .batch_size(2)
4610 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4611 .kernel_size(1, 1)
4612 .groups(2)
4613 .group_input_channels(23)
4614 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4615 .qmax(128)
4616 .iterations(3)
4617 .TestQU8();
4618 }
4619
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_without_bias)4620 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_without_bias) {
4621 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4622 DeconvolutionOperatorTester()
4623 .has_bias(false)
4624 .batch_size(2)
4625 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4626 .kernel_size(1, 1)
4627 .groups(2)
4628 .group_input_channels(23)
4629 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4630 .iterations(3)
4631 .TestQU8();
4632 }
4633
4634 /**************************** CONV path ****************************/
4635
4636 TEST(DECONVOLUTION_NHWC_QU8, 3x3) {
4637 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4638 DeconvolutionOperatorTester()
4639 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4640 .padding(1)
4641 .kernel_size(3, 3)
4642 .group_input_channels(15)
4643 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4644 .iterations(3)
4645 .TestQU8();
4646 }
4647
TEST(DECONVOLUTION_NHWC_QU8,Kx3)4648 TEST(DECONVOLUTION_NHWC_QU8, Kx3) {
4649 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4650 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
4651 DeconvolutionOperatorTester()
4652 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4653 .padding_width(1)
4654 .kernel_size(kernel_height, 3)
4655 .group_input_channels(17)
4656 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4657 .iterations(3)
4658 .TestQU8();
4659 }
4660 }
4661
4662 TEST(DECONVOLUTION_NHWC_QU8, 3xK) {
4663 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4664 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
4665 DeconvolutionOperatorTester()
4666 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4667 .padding_height(1)
4668 .kernel_size(3, kernel_width)
4669 .group_input_channels(17)
4670 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4671 .iterations(3)
4672 .TestQU8();
4673 }
4674 }
4675
4676 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_height_padding) {
4677 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4678 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
4679 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
4680 DeconvolutionOperatorTester()
4681 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4682 .padding_width(1)
4683 .padding_top(padding_top)
4684 .padding_bottom(padding_bottom)
4685 .kernel_size(3, 3)
4686 .group_input_channels(15)
4687 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4688 .iterations(1)
4689 .TestQU8();
4690 }
4691 }
4692 }
4693
4694 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_width_padding) {
4695 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4696 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
4697 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
4698 DeconvolutionOperatorTester()
4699 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4700 .padding_height(1)
4701 .padding_left(padding_left)
4702 .padding_right(padding_right)
4703 .kernel_size(3, 3)
4704 .group_input_channels(15)
4705 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4706 .iterations(1)
4707 .TestQU8();
4708 }
4709 }
4710 }
4711
4712 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_height_adjustment) {
4713 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4714 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
4715 DeconvolutionOperatorTester()
4716 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4717 .padding(1)
4718 .stride_height(adjustment_height + 1)
4719 .adjustment_height(adjustment_height)
4720 .kernel_size(3, 3)
4721 .group_input_channels(15)
4722 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4723 .iterations(1)
4724 .TestQU8();
4725 }
4726 }
4727
4728 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_width_adjustment) {
4729 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4730 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
4731 DeconvolutionOperatorTester()
4732 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4733 .padding(1)
4734 .stride_width(adjustment_width + 1)
4735 .adjustment_width(adjustment_width)
4736 .kernel_size(3, 3)
4737 .group_input_channels(15)
4738 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4739 .iterations(1)
4740 .TestQU8();
4741 }
4742 }
4743
4744 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_input_height) {
4745 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4746 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4747 DeconvolutionOperatorTester()
4748 .input_size(input_height, kUnstridedInputWidth)
4749 .padding(1)
4750 .kernel_size(3, 3)
4751 .group_input_channels(15)
4752 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4753 .iterations(1)
4754 .TestQU8();
4755 }
4756 }
4757
4758 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_input_width) {
4759 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4760 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4761 DeconvolutionOperatorTester()
4762 .input_size(kUnstridedInputHeight, input_width)
4763 .padding(1)
4764 .kernel_size(3, 3)
4765 .group_input_channels(15)
4766 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4767 .iterations(1)
4768 .TestQU8();
4769 }
4770 }
4771
4772 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_input_channels) {
4773 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4774 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4775 DeconvolutionOperatorTester()
4776 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4777 .padding(1)
4778 .kernel_size(3, 3)
4779 .group_input_channels(input_channels)
4780 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4781 .iterations(1)
4782 .TestQU8();
4783 }
4784 }
4785
4786 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_output_channels) {
4787 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4788 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4789 DeconvolutionOperatorTester()
4790 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4791 .padding(1)
4792 .kernel_size(3, 3)
4793 .group_input_channels(23)
4794 .group_output_channels(output_channels)
4795 .iterations(1)
4796 .TestQU8();
4797 }
4798 }
4799
4800 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_height_dilation) {
4801 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4802 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
4803 DeconvolutionOperatorTester()
4804 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4805 .padding(1)
4806 .kernel_size(3, 3)
4807 .dilation_height(dilation_height)
4808 .group_input_channels(23)
4809 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4810 .iterations(3)
4811 .TestQU8();
4812 }
4813 }
4814
4815 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_width_dilation) {
4816 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4817 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
4818 DeconvolutionOperatorTester()
4819 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4820 .padding(1)
4821 .kernel_size(3, 3)
4822 .dilation_width(dilation_width)
4823 .group_input_channels(23)
4824 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4825 .iterations(3)
4826 .TestQU8();
4827 }
4828 }
4829
4830 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_height_dilation_and_stride) {
4831 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4832 DeconvolutionOperatorTester()
4833 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4834 .padding(1)
4835 .kernel_size(3, 3)
4836 .dilation_height(3)
4837 .stride_height(2)
4838 .group_input_channels(23)
4839 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4840 .iterations(3)
4841 .TestQU8();
4842 }
4843
4844 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_width_dilation_and_stride) {
4845 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4846 DeconvolutionOperatorTester()
4847 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4848 .padding(1)
4849 .kernel_size(3, 3)
4850 .dilation_width(3)
4851 .stride_width(2)
4852 .group_input_channels(23)
4853 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4854 .iterations(3)
4855 .TestQU8();
4856 }
4857
4858 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_input_stride) {
4859 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4860 DeconvolutionOperatorTester()
4861 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4862 .padding(1)
4863 .kernel_size(3, 3)
4864 .group_input_channels(23)
4865 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4866 .input_pixel_stride(28)
4867 .iterations(3)
4868 .TestQU8();
4869 }
4870
4871 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_output_stride) {
4872 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4873 DeconvolutionOperatorTester()
4874 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4875 .padding(1)
4876 .kernel_size(3, 3)
4877 .group_input_channels(23)
4878 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4879 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4880 .iterations(3)
4881 .TestQU8();
4882 }
4883
4884 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_qmin) {
4885 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4886 DeconvolutionOperatorTester()
4887 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4888 .padding(1)
4889 .kernel_size(3, 3)
4890 .group_input_channels(23)
4891 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4892 .qmin(128)
4893 .iterations(3)
4894 .TestQU8();
4895 }
4896
4897 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_qmax) {
4898 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4899 DeconvolutionOperatorTester()
4900 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4901 .padding(1)
4902 .kernel_size(3, 3)
4903 .group_input_channels(23)
4904 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4905 .qmax(128)
4906 .iterations(3)
4907 .TestQU8();
4908 }
4909
4910 TEST(DECONVOLUTION_NHWC_QU8, 3x3_without_bias) {
4911 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4912 DeconvolutionOperatorTester()
4913 .has_bias(false)
4914 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4915 .padding(1)
4916 .kernel_size(3, 3)
4917 .group_input_channels(23)
4918 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4919 .iterations(3)
4920 .TestQU8();
4921 }
4922
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_3x3)4923 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_3x3) {
4924 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4925 DeconvolutionOperatorTester()
4926 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4927 .padding(1)
4928 .kernel_size(3, 3)
4929 .group_input_channels(15)
4930 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4931 .use_weights_cache(true)
4932 .iterations(3)
4933 .TestQU8();
4934 }
4935
4936 /**************************** CONV path, grouped ****************************/
4937
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3)4938 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3) {
4939 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4940 DeconvolutionOperatorTester()
4941 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4942 .padding(1)
4943 .kernel_size(3, 3)
4944 .groups(2)
4945 .group_input_channels(15)
4946 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4947 .iterations(3)
4948 .TestQU8();
4949 }
4950
TEST(DECONVOLUTION_NHWC_QU8,grouped_Kx3)4951 TEST(DECONVOLUTION_NHWC_QU8, grouped_Kx3) {
4952 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4953 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
4954 DeconvolutionOperatorTester()
4955 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4956 .padding_width(1)
4957 .kernel_size(kernel_height, 3)
4958 .groups(2)
4959 .group_input_channels(17)
4960 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4961 .iterations(3)
4962 .TestQU8();
4963 }
4964 }
4965
TEST(DECONVOLUTION_NHWC_QU8,grouped_3xK)4966 TEST(DECONVOLUTION_NHWC_QU8, grouped_3xK) {
4967 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4968 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
4969 DeconvolutionOperatorTester()
4970 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4971 .padding_height(1)
4972 .kernel_size(3, kernel_width)
4973 .groups(2)
4974 .group_input_channels(17)
4975 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4976 .iterations(3)
4977 .TestQU8();
4978 }
4979 }
4980
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_height_padding)4981 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_height_padding) {
4982 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4983 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
4984 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
4985 DeconvolutionOperatorTester()
4986 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4987 .padding_width(1)
4988 .padding_top(padding_top)
4989 .padding_bottom(padding_bottom)
4990 .kernel_size(3, 3)
4991 .groups(2)
4992 .group_input_channels(15)
4993 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4994 .iterations(1)
4995 .TestQU8();
4996 }
4997 }
4998 }
4999
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_width_padding)5000 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_width_padding) {
5001 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5002 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
5003 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
5004 DeconvolutionOperatorTester()
5005 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5006 .padding_height(1)
5007 .padding_left(padding_left)
5008 .padding_right(padding_right)
5009 .kernel_size(3, 3)
5010 .groups(2)
5011 .group_input_channels(15)
5012 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5013 .iterations(1)
5014 .TestQU8();
5015 }
5016 }
5017 }
5018
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_height_adjustment)5019 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_height_adjustment) {
5020 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5021 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
5022 DeconvolutionOperatorTester()
5023 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5024 .padding(1)
5025 .stride_height(adjustment_height + 1)
5026 .adjustment_height(adjustment_height)
5027 .kernel_size(3, 3)
5028 .groups(2)
5029 .group_input_channels(15)
5030 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5031 .iterations(1)
5032 .TestQU8();
5033 }
5034 }
5035
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_width_adjustment)5036 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_width_adjustment) {
5037 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5038 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
5039 DeconvolutionOperatorTester()
5040 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5041 .padding(1)
5042 .stride_width(adjustment_width + 1)
5043 .adjustment_width(adjustment_width)
5044 .kernel_size(3, 3)
5045 .groups(2)
5046 .group_input_channels(15)
5047 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5048 .iterations(1)
5049 .TestQU8();
5050 }
5051 }
5052
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_input_height)5053 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_input_height) {
5054 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5055 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
5056 DeconvolutionOperatorTester()
5057 .input_size(input_height, kUnstridedInputWidth)
5058 .padding(1)
5059 .kernel_size(3, 3)
5060 .groups(2)
5061 .group_input_channels(15)
5062 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5063 .iterations(1)
5064 .TestQU8();
5065 }
5066 }
5067
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_input_width)5068 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_input_width) {
5069 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5070 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
5071 DeconvolutionOperatorTester()
5072 .input_size(kUnstridedInputHeight, input_width)
5073 .padding(1)
5074 .kernel_size(3, 3)
5075 .groups(2)
5076 .group_input_channels(15)
5077 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5078 .iterations(1)
5079 .TestQU8();
5080 }
5081 }
5082
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_input_channels)5083 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_input_channels) {
5084 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5085 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
5086 DeconvolutionOperatorTester()
5087 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5088 .padding(1)
5089 .kernel_size(3, 3)
5090 .groups(2)
5091 .group_input_channels(input_channels)
5092 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5093 .iterations(1)
5094 .TestQU8();
5095 }
5096 }
5097
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_output_channels)5098 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_output_channels) {
5099 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5100 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
5101 DeconvolutionOperatorTester()
5102 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5103 .padding(1)
5104 .kernel_size(3, 3)
5105 .groups(2)
5106 .group_input_channels(23)
5107 .group_output_channels(output_channels)
5108 .iterations(1)
5109 .TestQU8();
5110 }
5111 }
5112
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_height_dilation)5113 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_height_dilation) {
5114 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5115 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
5116 DeconvolutionOperatorTester()
5117 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5118 .padding(1)
5119 .kernel_size(3, 3)
5120 .dilation_height(dilation_height)
5121 .groups(2)
5122 .group_input_channels(23)
5123 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5124 .iterations(3)
5125 .TestQU8();
5126 }
5127 }
5128
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_width_dilation)5129 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_width_dilation) {
5130 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5131 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
5132 DeconvolutionOperatorTester()
5133 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5134 .padding(1)
5135 .kernel_size(3, 3)
5136 .dilation_width(dilation_width)
5137 .groups(2)
5138 .group_input_channels(23)
5139 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5140 .iterations(3)
5141 .TestQU8();
5142 }
5143 }
5144
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_height_dilation_and_stride)5145 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_height_dilation_and_stride) {
5146 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5147 DeconvolutionOperatorTester()
5148 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5149 .padding(1)
5150 .kernel_size(3, 3)
5151 .dilation_height(3)
5152 .stride_height(2)
5153 .groups(2)
5154 .group_input_channels(23)
5155 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5156 .iterations(3)
5157 .TestQU8();
5158 }
5159
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_width_dilation_and_stride)5160 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_width_dilation_and_stride) {
5161 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5162 DeconvolutionOperatorTester()
5163 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5164 .padding(1)
5165 .kernel_size(3, 3)
5166 .dilation_width(3)
5167 .stride_width(2)
5168 .groups(2)
5169 .group_input_channels(23)
5170 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5171 .iterations(3)
5172 .TestQU8();
5173 }
5174
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_input_stride)5175 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_input_stride) {
5176 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5177 DeconvolutionOperatorTester()
5178 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5179 .padding(1)
5180 .kernel_size(3, 3)
5181 .groups(2)
5182 .group_input_channels(23)
5183 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5184 .input_pixel_stride(47)
5185 .iterations(3)
5186 .TestQU8();
5187 }
5188
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_output_stride)5189 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_output_stride) {
5190 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5191 DeconvolutionOperatorTester()
5192 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5193 .padding(1)
5194 .kernel_size(3, 3)
5195 .groups(2)
5196 .group_input_channels(23)
5197 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
5198 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
5199 .iterations(3)
5200 .TestQU8();
5201 }
5202
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_qmin)5203 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_qmin) {
5204 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5205 DeconvolutionOperatorTester()
5206 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5207 .padding(1)
5208 .kernel_size(3, 3)
5209 .groups(2)
5210 .group_input_channels(23)
5211 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5212 .qmin(128)
5213 .iterations(3)
5214 .TestQU8();
5215 }
5216
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_qmax)5217 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_qmax) {
5218 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5219 DeconvolutionOperatorTester()
5220 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5221 .padding(1)
5222 .kernel_size(3, 3)
5223 .groups(2)
5224 .group_input_channels(23)
5225 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5226 .qmax(128)
5227 .iterations(3)
5228 .TestQU8();
5229 }
5230
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_without_bias)5231 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_without_bias) {
5232 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5233 DeconvolutionOperatorTester()
5234 .has_bias(false)
5235 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5236 .padding(1)
5237 .kernel_size(3, 3)
5238 .groups(2)
5239 .group_input_channels(23)
5240 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5241 .iterations(3)
5242 .TestQU8();
5243 }
5244
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_grouped_3x3)5245 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_grouped_3x3) {
5246 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5247 DeconvolutionOperatorTester()
5248 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5249 .padding(1)
5250 .kernel_size(3, 3)
5251 .groups(2)
5252 .group_input_channels(15)
5253 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5254 .use_weights_cache(true)
5255 .iterations(3)
5256 .TestQU8();
5257 }
5258
5259 /**************************** CONV path, batched ****************************/
5260
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3)5261 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3) {
5262 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5263 DeconvolutionOperatorTester()
5264 .batch_size(2)
5265 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5266 .padding(1)
5267 .kernel_size(3, 3)
5268 .group_input_channels(15)
5269 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5270 .iterations(3)
5271 .TestQU8();
5272 }
5273
TEST(DECONVOLUTION_NHWC_QU8,batched_Kx3)5274 TEST(DECONVOLUTION_NHWC_QU8, batched_Kx3) {
5275 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5276 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
5277 DeconvolutionOperatorTester()
5278 .batch_size(2)
5279 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5280 .padding_width(1)
5281 .kernel_size(kernel_height, 3)
5282 .group_input_channels(17)
5283 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5284 .iterations(3)
5285 .TestQU8();
5286 }
5287 }
5288
TEST(DECONVOLUTION_NHWC_QU8,batched_3xK)5289 TEST(DECONVOLUTION_NHWC_QU8, batched_3xK) {
5290 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5291 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
5292 DeconvolutionOperatorTester()
5293 .batch_size(2)
5294 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5295 .padding_height(1)
5296 .kernel_size(3, kernel_width)
5297 .group_input_channels(17)
5298 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5299 .iterations(3)
5300 .TestQU8();
5301 }
5302 }
5303
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_height_padding)5304 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_height_padding) {
5305 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5306 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
5307 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
5308 DeconvolutionOperatorTester()
5309 .batch_size(2)
5310 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5311 .padding_width(1)
5312 .padding_top(padding_top)
5313 .padding_bottom(padding_bottom)
5314 .kernel_size(3, 3)
5315 .group_input_channels(15)
5316 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5317 .iterations(1)
5318 .TestQU8();
5319 }
5320 }
5321 }
5322
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_width_padding)5323 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_width_padding) {
5324 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5325 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
5326 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
5327 DeconvolutionOperatorTester()
5328 .batch_size(2)
5329 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5330 .padding_height(1)
5331 .padding_left(padding_left)
5332 .padding_right(padding_right)
5333 .kernel_size(3, 3)
5334 .group_input_channels(15)
5335 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5336 .iterations(1)
5337 .TestQU8();
5338 }
5339 }
5340 }
5341
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_height_adjustment)5342 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_height_adjustment) {
5343 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5344 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
5345 DeconvolutionOperatorTester()
5346 .batch_size(2)
5347 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5348 .padding(1)
5349 .stride_height(adjustment_height + 1)
5350 .adjustment_height(adjustment_height)
5351 .kernel_size(3, 3)
5352 .group_input_channels(15)
5353 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5354 .iterations(1)
5355 .TestQU8();
5356 }
5357 }
5358
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_width_adjustment)5359 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_width_adjustment) {
5360 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5361 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
5362 DeconvolutionOperatorTester()
5363 .batch_size(2)
5364 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5365 .padding(1)
5366 .stride_width(adjustment_width + 1)
5367 .adjustment_width(adjustment_width)
5368 .kernel_size(3, 3)
5369 .group_input_channels(15)
5370 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5371 .iterations(1)
5372 .TestQU8();
5373 }
5374 }
5375
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_input_height)5376 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_input_height) {
5377 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5378 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
5379 DeconvolutionOperatorTester()
5380 .batch_size(2)
5381 .input_size(input_height, kUnstridedInputWidth)
5382 .padding(1)
5383 .kernel_size(3, 3)
5384 .group_input_channels(15)
5385 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5386 .iterations(1)
5387 .TestQU8();
5388 }
5389 }
5390
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_input_width)5391 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_input_width) {
5392 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5393 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
5394 DeconvolutionOperatorTester()
5395 .batch_size(2)
5396 .input_size(kUnstridedInputHeight, input_width)
5397 .padding(1)
5398 .kernel_size(3, 3)
5399 .group_input_channels(15)
5400 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5401 .iterations(1)
5402 .TestQU8();
5403 }
5404 }
5405
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_input_channels)5406 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_input_channels) {
5407 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5408 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
5409 DeconvolutionOperatorTester()
5410 .batch_size(2)
5411 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5412 .padding(1)
5413 .kernel_size(3, 3)
5414 .group_input_channels(input_channels)
5415 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5416 .iterations(1)
5417 .TestQU8();
5418 }
5419 }
5420
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_output_channels)5421 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_output_channels) {
5422 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5423 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
5424 DeconvolutionOperatorTester()
5425 .batch_size(2)
5426 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5427 .padding(1)
5428 .kernel_size(3, 3)
5429 .group_input_channels(23)
5430 .group_output_channels(output_channels)
5431 .iterations(1)
5432 .TestQU8();
5433 }
5434 }
5435
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_height_dilation)5436 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_height_dilation) {
5437 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5438 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
5439 DeconvolutionOperatorTester()
5440 .batch_size(2)
5441 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5442 .padding(1)
5443 .kernel_size(3, 3)
5444 .dilation_height(dilation_height)
5445 .group_input_channels(23)
5446 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5447 .iterations(3)
5448 .TestQU8();
5449 }
5450 }
5451
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_width_dilation)5452 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_width_dilation) {
5453 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5454 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
5455 DeconvolutionOperatorTester()
5456 .batch_size(2)
5457 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5458 .padding(1)
5459 .kernel_size(3, 3)
5460 .dilation_width(dilation_width)
5461 .group_input_channels(23)
5462 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5463 .iterations(3)
5464 .TestQU8();
5465 }
5466 }
5467
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_height_dilation_and_stride)5468 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_height_dilation_and_stride) {
5469 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5470 DeconvolutionOperatorTester()
5471 .batch_size(2)
5472 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5473 .padding(1)
5474 .kernel_size(3, 3)
5475 .dilation_height(3)
5476 .stride_height(2)
5477 .group_input_channels(23)
5478 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5479 .iterations(3)
5480 .TestQU8();
5481 }
5482
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_width_dilation_and_stride)5483 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_width_dilation_and_stride) {
5484 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5485 DeconvolutionOperatorTester()
5486 .batch_size(2)
5487 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5488 .padding(1)
5489 .kernel_size(3, 3)
5490 .dilation_width(3)
5491 .stride_width(2)
5492 .group_input_channels(23)
5493 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5494 .iterations(3)
5495 .TestQU8();
5496 }
5497
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_input_stride)5498 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_input_stride) {
5499 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5500 DeconvolutionOperatorTester()
5501 .batch_size(2)
5502 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5503 .padding(1)
5504 .kernel_size(3, 3)
5505 .group_input_channels(23)
5506 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5507 .input_pixel_stride(28)
5508 .iterations(3)
5509 .TestQU8();
5510 }
5511
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_output_stride)5512 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_output_stride) {
5513 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5514 DeconvolutionOperatorTester()
5515 .batch_size(2)
5516 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5517 .padding(1)
5518 .kernel_size(3, 3)
5519 .group_input_channels(23)
5520 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5521 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
5522 .iterations(3)
5523 .TestQU8();
5524 }
5525
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_qmin)5526 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_qmin) {
5527 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5528 DeconvolutionOperatorTester()
5529 .batch_size(2)
5530 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5531 .padding(1)
5532 .kernel_size(3, 3)
5533 .group_input_channels(23)
5534 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5535 .qmin(128)
5536 .iterations(3)
5537 .TestQU8();
5538 }
5539
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_qmax)5540 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_qmax) {
5541 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5542 DeconvolutionOperatorTester()
5543 .batch_size(2)
5544 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5545 .padding(1)
5546 .kernel_size(3, 3)
5547 .group_input_channels(23)
5548 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5549 .qmax(128)
5550 .iterations(3)
5551 .TestQU8();
5552 }
5553
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_without_bias)5554 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_without_bias) {
5555 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5556 DeconvolutionOperatorTester()
5557 .has_bias(false)
5558 .batch_size(2)
5559 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5560 .padding(1)
5561 .kernel_size(3, 3)
5562 .group_input_channels(23)
5563 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5564 .iterations(3)
5565 .TestQU8();
5566 }
5567
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_3x3)5568 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_3x3) {
5569 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5570 DeconvolutionOperatorTester()
5571 .batch_size(2)
5572 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5573 .padding(1)
5574 .kernel_size(3, 3)
5575 .group_input_channels(15)
5576 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5577 .use_weights_cache(true)
5578 .iterations(3)
5579 .TestQU8();
5580 }
5581
5582 /**************************** CONV path, grouped, batched ****************************/
5583
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3)5584 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3) {
5585 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5586 DeconvolutionOperatorTester()
5587 .batch_size(2)
5588 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5589 .padding(1)
5590 .kernel_size(3, 3)
5591 .groups(2)
5592 .group_input_channels(15)
5593 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5594 .iterations(3)
5595 .TestQU8();
5596 }
5597
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_Kx3)5598 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_Kx3) {
5599 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5600 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
5601 DeconvolutionOperatorTester()
5602 .batch_size(2)
5603 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5604 .padding_width(1)
5605 .kernel_size(kernel_height, 3)
5606 .groups(2)
5607 .group_input_channels(17)
5608 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5609 .iterations(3)
5610 .TestQU8();
5611 }
5612 }
5613
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3xK)5614 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3xK) {
5615 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5616 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
5617 DeconvolutionOperatorTester()
5618 .batch_size(2)
5619 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5620 .padding_height(1)
5621 .kernel_size(3, kernel_width)
5622 .groups(2)
5623 .group_input_channels(17)
5624 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5625 .iterations(3)
5626 .TestQU8();
5627 }
5628 }
5629
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_height_padding)5630 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_height_padding) {
5631 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5632 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
5633 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
5634 DeconvolutionOperatorTester()
5635 .batch_size(2)
5636 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5637 .padding_width(1)
5638 .padding_top(padding_top)
5639 .padding_bottom(padding_bottom)
5640 .kernel_size(3, 3)
5641 .groups(2)
5642 .group_input_channels(15)
5643 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5644 .iterations(1)
5645 .TestQU8();
5646 }
5647 }
5648 }
5649
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_width_padding)5650 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_width_padding) {
5651 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5652 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
5653 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
5654 DeconvolutionOperatorTester()
5655 .batch_size(2)
5656 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5657 .padding_height(1)
5658 .padding_left(padding_left)
5659 .padding_right(padding_right)
5660 .kernel_size(3, 3)
5661 .groups(2)
5662 .group_input_channels(15)
5663 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5664 .iterations(1)
5665 .TestQU8();
5666 }
5667 }
5668 }
5669
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_height_adjustment)5670 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_height_adjustment) {
5671 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5672 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
5673 DeconvolutionOperatorTester()
5674 .batch_size(2)
5675 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5676 .padding(1)
5677 .stride_height(adjustment_height + 1)
5678 .adjustment_height(adjustment_height)
5679 .kernel_size(3, 3)
5680 .groups(2)
5681 .group_input_channels(15)
5682 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5683 .iterations(1)
5684 .TestQU8();
5685 }
5686 }
5687
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_width_adjustment)5688 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_width_adjustment) {
5689 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5690 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
5691 DeconvolutionOperatorTester()
5692 .batch_size(2)
5693 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5694 .padding(1)
5695 .stride_width(adjustment_width + 1)
5696 .adjustment_width(adjustment_width)
5697 .kernel_size(3, 3)
5698 .groups(2)
5699 .group_input_channels(15)
5700 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5701 .iterations(1)
5702 .TestQU8();
5703 }
5704 }
5705
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_input_height)5706 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_input_height) {
5707 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5708 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
5709 DeconvolutionOperatorTester()
5710 .batch_size(2)
5711 .input_size(input_height, kUnstridedInputWidth)
5712 .padding(1)
5713 .kernel_size(3, 3)
5714 .groups(2)
5715 .group_input_channels(15)
5716 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5717 .iterations(1)
5718 .TestQU8();
5719 }
5720 }
5721
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_input_width)5722 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_input_width) {
5723 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5724 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
5725 DeconvolutionOperatorTester()
5726 .batch_size(2)
5727 .input_size(kUnstridedInputHeight, input_width)
5728 .padding(1)
5729 .kernel_size(3, 3)
5730 .groups(2)
5731 .group_input_channels(15)
5732 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5733 .iterations(1)
5734 .TestQU8();
5735 }
5736 }
5737
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_input_channels)5738 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_input_channels) {
5739 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5740 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
5741 DeconvolutionOperatorTester()
5742 .batch_size(2)
5743 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5744 .padding(1)
5745 .kernel_size(3, 3)
5746 .groups(2)
5747 .group_input_channels(input_channels)
5748 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5749 .iterations(1)
5750 .TestQU8();
5751 }
5752 }
5753
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_output_channels)5754 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_output_channels) {
5755 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5756 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
5757 DeconvolutionOperatorTester()
5758 .batch_size(2)
5759 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5760 .padding(1)
5761 .kernel_size(3, 3)
5762 .groups(2)
5763 .group_input_channels(23)
5764 .group_output_channels(output_channels)
5765 .iterations(1)
5766 .TestQU8();
5767 }
5768 }
5769
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_height_dilation)5770 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_height_dilation) {
5771 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5772 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
5773 DeconvolutionOperatorTester()
5774 .batch_size(2)
5775 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5776 .padding(1)
5777 .kernel_size(3, 3)
5778 .dilation_height(dilation_height)
5779 .groups(2)
5780 .group_input_channels(23)
5781 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5782 .iterations(3)
5783 .TestQU8();
5784 }
5785 }
5786
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_width_dilation)5787 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_width_dilation) {
5788 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5789 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
5790 DeconvolutionOperatorTester()
5791 .batch_size(2)
5792 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5793 .padding(1)
5794 .kernel_size(3, 3)
5795 .dilation_width(dilation_width)
5796 .groups(2)
5797 .group_input_channels(23)
5798 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5799 .iterations(3)
5800 .TestQU8();
5801 }
5802 }
5803
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_height_dilation_and_stride)5804 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_height_dilation_and_stride) {
5805 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5806 DeconvolutionOperatorTester()
5807 .batch_size(2)
5808 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5809 .padding(1)
5810 .kernel_size(3, 3)
5811 .dilation_height(3)
5812 .stride_width(2)
5813 .groups(2)
5814 .group_input_channels(23)
5815 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5816 .iterations(3)
5817 .TestQU8();
5818 }
5819
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_width_dilation_and_stride)5820 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_width_dilation_and_stride) {
5821 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5822 DeconvolutionOperatorTester()
5823 .batch_size(2)
5824 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5825 .padding(1)
5826 .kernel_size(3, 3)
5827 .dilation_width(3)
5828 .stride_width(2)
5829 .groups(2)
5830 .group_input_channels(23)
5831 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5832 .iterations(3)
5833 .TestQU8();
5834 }
5835
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_input_stride)5836 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_input_stride) {
5837 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5838 DeconvolutionOperatorTester()
5839 .batch_size(2)
5840 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5841 .padding(1)
5842 .kernel_size(3, 3)
5843 .groups(2)
5844 .group_input_channels(23)
5845 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5846 .input_pixel_stride(47)
5847 .iterations(3)
5848 .TestQU8();
5849 }
5850
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_output_stride)5851 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_output_stride) {
5852 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5853 DeconvolutionOperatorTester()
5854 .batch_size(2)
5855 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5856 .padding(1)
5857 .kernel_size(3, 3)
5858 .groups(2)
5859 .group_input_channels(23)
5860 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
5861 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
5862 .iterations(3)
5863 .TestQU8();
5864 }
5865
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_qmin)5866 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_qmin) {
5867 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5868 DeconvolutionOperatorTester()
5869 .batch_size(2)
5870 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5871 .padding(1)
5872 .kernel_size(3, 3)
5873 .groups(2)
5874 .group_input_channels(23)
5875 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5876 .qmin(128)
5877 .iterations(3)
5878 .TestQU8();
5879 }
5880
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_qmax)5881 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_qmax) {
5882 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5883 DeconvolutionOperatorTester()
5884 .batch_size(2)
5885 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5886 .padding(1)
5887 .kernel_size(3, 3)
5888 .groups(2)
5889 .group_input_channels(23)
5890 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5891 .qmax(128)
5892 .iterations(3)
5893 .TestQU8();
5894 }
5895
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_without_bias)5896 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_without_bias) {
5897 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5898 DeconvolutionOperatorTester()
5899 .has_bias(false)
5900 .batch_size(2)
5901 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5902 .padding(1)
5903 .kernel_size(3, 3)
5904 .groups(2)
5905 .group_input_channels(23)
5906 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5907 .iterations(3)
5908 .TestQU8();
5909 }
5910
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_grouped_3x3)5911 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_grouped_3x3) {
5912 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5913 DeconvolutionOperatorTester()
5914 .batch_size(2)
5915 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5916 .padding(1)
5917 .kernel_size(3, 3)
5918 .groups(2)
5919 .group_input_channels(15)
5920 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5921 .use_weights_cache(true)
5922 .iterations(3)
5923 .TestQU8();
5924 }
5925
5926 /**************************** CONV path, setup ****************************/
5927
5928 TEST(DECONVOLUTION_NHWC_QU8, 3x3_setup_changing_batch) {
5929 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5930 DeconvolutionOperatorTester()
5931 .batch_size(2)
5932 .next_batch_size(5)
5933 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5934 .kernel_height(3)
5935 .kernel_width(5)
5936 .groups(2)
5937 .group_input_channels(15)
5938 .group_output_channels(17)
5939 .TestSetupQU8();
5940 }
5941
5942 TEST(DECONVOLUTION_NHWC_QU8, 3x3_setup_changing_height) {
5943 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5944 DeconvolutionOperatorTester()
5945 .batch_size(2)
5946 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5947 .next_input_height(kUnstridedInputHeight + 3)
5948 .kernel_height(3)
5949 .kernel_width(5)
5950 .groups(2)
5951 .group_input_channels(15)
5952 .group_output_channels(17)
5953 .TestSetupQU8();
5954 }
5955
5956 TEST(DECONVOLUTION_NHWC_QU8, 3x3_setup_changing_width) {
5957 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5958 DeconvolutionOperatorTester()
5959 .batch_size(2)
5960 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5961 .next_input_width(kUnstridedInputWidth + 3)
5962 .kernel_height(3)
5963 .kernel_width(5)
5964 .groups(2)
5965 .group_input_channels(15)
5966 .group_output_channels(17)
5967 .TestSetupQU8();
5968 }
5969
5970 /**************************** SUBCONV2D/IGEMM path ****************************/
5971
5972 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2) {
5973 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5974 DeconvolutionOperatorTester()
5975 .input_size(kStridedInputHeight, kStridedInputWidth)
5976 .padding(1)
5977 .kernel_size(3, 3)
5978 .stride(2)
5979 .group_input_channels(15)
5980 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5981 .iterations(3)
5982 .TestQU8();
5983 }
5984
TEST(DECONVOLUTION_NHWC_QU8,Kx3s2)5985 TEST(DECONVOLUTION_NHWC_QU8, Kx3s2) {
5986 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5987 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
5988 DeconvolutionOperatorTester()
5989 .input_size(kStridedInputHeight, kStridedInputWidth)
5990 .padding_width(1)
5991 .kernel_size(kernel_height, 3)
5992 .stride(2)
5993 .group_input_channels(17)
5994 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5995 .iterations(3)
5996 .TestQU8();
5997 }
5998 }
5999
6000 TEST(DECONVOLUTION_NHWC_QU8, 3xKs2) {
6001 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6002 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6003 DeconvolutionOperatorTester()
6004 .input_size(kStridedInputHeight, kStridedInputWidth)
6005 .padding_height(1)
6006 .kernel_size(3, kernel_width)
6007 .stride(2)
6008 .group_input_channels(17)
6009 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6010 .iterations(3)
6011 .TestQU8();
6012 }
6013 }
6014
6015 TEST(DECONVOLUTION_NHWC_QU8, 3x3sSx1) {
6016 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6017 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6018 DeconvolutionOperatorTester()
6019 .input_size(kStridedInputHeight, kStridedInputWidth)
6020 .padding(1)
6021 .padding_width(1)
6022 .kernel_size(3, 3)
6023 .stride_height(stride_height)
6024 .group_input_channels(17)
6025 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6026 .iterations(3)
6027 .TestQU8();
6028 }
6029 }
6030
6031 TEST(DECONVOLUTION_NHWC_QU8, 3x3s1xS) {
6032 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6033 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6034 DeconvolutionOperatorTester()
6035 .input_size(kStridedInputHeight, kStridedInputWidth)
6036 .padding(1)
6037 .padding_width(1)
6038 .kernel_size(3, 3)
6039 .stride_width(stride_width)
6040 .group_input_channels(17)
6041 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6042 .iterations(3)
6043 .TestQU8();
6044 }
6045 }
6046
6047 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_height_padding) {
6048 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6049 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6050 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6051 DeconvolutionOperatorTester()
6052 .input_size(kStridedInputHeight, kStridedInputWidth)
6053 .padding_width(1)
6054 .padding_top(padding_top)
6055 .padding_bottom(padding_bottom)
6056 .kernel_size(3, 3)
6057 .stride(2)
6058 .group_input_channels(15)
6059 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6060 .iterations(1)
6061 .TestQU8();
6062 }
6063 }
6064 }
6065
6066 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_width_padding) {
6067 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6068 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6069 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6070 DeconvolutionOperatorTester()
6071 .input_size(kStridedInputHeight, kStridedInputWidth)
6072 .padding_height(1)
6073 .padding_left(padding_left)
6074 .padding_right(padding_right)
6075 .kernel_size(3, 3)
6076 .stride(2)
6077 .group_input_channels(15)
6078 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6079 .iterations(1)
6080 .TestQU8();
6081 }
6082 }
6083 }
6084
6085 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_height_adjustment) {
6086 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6087 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
6088 DeconvolutionOperatorTester()
6089 .input_size(kStridedInputHeight, kStridedInputWidth)
6090 .padding(1)
6091 .adjustment_height(adjustment_height)
6092 .kernel_size(3, 3)
6093 .stride(2)
6094 .group_input_channels(15)
6095 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6096 .iterations(1)
6097 .TestQU8();
6098 }
6099 }
6100
6101 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_width_adjustment) {
6102 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6103 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
6104 DeconvolutionOperatorTester()
6105 .input_size(kStridedInputHeight, kStridedInputWidth)
6106 .padding(1)
6107 .adjustment_width(adjustment_width)
6108 .kernel_size(3, 3)
6109 .stride(2)
6110 .group_input_channels(15)
6111 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6112 .iterations(1)
6113 .TestQU8();
6114 }
6115 }
6116
6117 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_input_height) {
6118 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6119 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
6120 DeconvolutionOperatorTester()
6121 .input_size(input_height, kStridedInputWidth)
6122 .padding(1)
6123 .kernel_size(3, 3)
6124 .stride(2)
6125 .group_input_channels(15)
6126 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6127 .iterations(1)
6128 .TestQU8();
6129 }
6130 }
6131
6132 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_input_width) {
6133 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6134 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
6135 DeconvolutionOperatorTester()
6136 .input_size(kStridedInputHeight, kStridedInputWidth)
6137 .padding(1)
6138 .kernel_size(3, 3)
6139 .stride(2)
6140 .group_input_channels(15)
6141 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6142 .iterations(1)
6143 .TestQU8();
6144 }
6145 }
6146
6147 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_input_channels) {
6148 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6149 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
6150 DeconvolutionOperatorTester()
6151 .input_size(kStridedInputHeight, kStridedInputWidth)
6152 .padding(1)
6153 .kernel_size(3, 3)
6154 .stride(2)
6155 .group_input_channels(input_channels)
6156 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6157 .iterations(1)
6158 .TestQU8();
6159 }
6160 }
6161
6162 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_output_channels) {
6163 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6164 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
6165 DeconvolutionOperatorTester()
6166 .input_size(kStridedInputHeight, kStridedInputWidth)
6167 .padding(1)
6168 .kernel_size(3, 3)
6169 .stride(2)
6170 .group_input_channels(23)
6171 .group_output_channels(output_channels)
6172 .iterations(1)
6173 .TestQU8();
6174 }
6175 }
6176
6177 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_input_stride) {
6178 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6179 DeconvolutionOperatorTester()
6180 .input_size(kStridedInputHeight, kStridedInputWidth)
6181 .padding(1)
6182 .kernel_size(3, 3)
6183 .stride(2)
6184 .group_input_channels(23)
6185 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6186 .input_pixel_stride(28)
6187 .iterations(3)
6188 .TestQU8();
6189 }
6190
6191 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_output_stride) {
6192 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6193 DeconvolutionOperatorTester()
6194 .input_size(kStridedInputHeight, kStridedInputWidth)
6195 .padding(1)
6196 .kernel_size(3, 3)
6197 .stride(2)
6198 .group_input_channels(23)
6199 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6200 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
6201 .iterations(3)
6202 .TestQU8();
6203 }
6204
6205 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_qmin) {
6206 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6207 DeconvolutionOperatorTester()
6208 .input_size(kStridedInputHeight, kStridedInputWidth)
6209 .padding(1)
6210 .kernel_size(3, 3)
6211 .stride(2)
6212 .group_input_channels(23)
6213 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6214 .qmin(128)
6215 .iterations(3)
6216 .TestQU8();
6217 }
6218
6219 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_qmax) {
6220 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6221 DeconvolutionOperatorTester()
6222 .input_size(kStridedInputHeight, kStridedInputWidth)
6223 .padding(1)
6224 .kernel_size(3, 3)
6225 .stride(2)
6226 .group_input_channels(23)
6227 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6228 .qmax(128)
6229 .iterations(3)
6230 .TestQU8();
6231 }
6232
6233 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_without_bias) {
6234 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6235 DeconvolutionOperatorTester()
6236 .has_bias(false)
6237 .input_size(kStridedInputHeight, kStridedInputWidth)
6238 .padding(1)
6239 .kernel_size(3, 3)
6240 .stride(2)
6241 .group_input_channels(23)
6242 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6243 .iterations(3)
6244 .TestQU8();
6245 }
6246
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_3x3s2)6247 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_3x3s2) {
6248 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6249 DeconvolutionOperatorTester()
6250 .input_size(kStridedInputHeight, kStridedInputWidth)
6251 .padding(1)
6252 .kernel_size(3, 3)
6253 .stride(2)
6254 .group_input_channels(15)
6255 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6256 .use_weights_cache(true)
6257 .iterations(3)
6258 .TestQU8();
6259 }
6260
6261 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
6262
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2)6263 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2) {
6264 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6265 DeconvolutionOperatorTester()
6266 .input_size(kStridedInputHeight, kStridedInputWidth)
6267 .padding(1)
6268 .kernel_size(3, 3)
6269 .stride(2)
6270 .groups(2)
6271 .group_input_channels(17)
6272 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6273 .iterations(3)
6274 .TestQU8();
6275 }
6276
TEST(DECONVOLUTION_NHWC_QU8,grouped_Kx3s2)6277 TEST(DECONVOLUTION_NHWC_QU8, grouped_Kx3s2) {
6278 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6279 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
6280 DeconvolutionOperatorTester()
6281 .input_size(kStridedInputHeight, kStridedInputWidth)
6282 .padding_width(1)
6283 .kernel_size(kernel_height, 3)
6284 .stride(2)
6285 .groups(2)
6286 .group_input_channels(17)
6287 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6288 .iterations(3)
6289 .TestQU8();
6290 }
6291 }
6292
TEST(DECONVOLUTION_NHWC_QU8,grouped_3xKs2)6293 TEST(DECONVOLUTION_NHWC_QU8, grouped_3xKs2) {
6294 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6295 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6296 DeconvolutionOperatorTester()
6297 .input_size(kStridedInputHeight, kStridedInputWidth)
6298 .padding_height(1)
6299 .kernel_size(3, kernel_width)
6300 .stride(2)
6301 .groups(2)
6302 .group_input_channels(17)
6303 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6304 .iterations(3)
6305 .TestQU8();
6306 }
6307 }
6308
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3sSx1)6309 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3sSx1) {
6310 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6311 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6312 DeconvolutionOperatorTester()
6313 .input_size(kStridedInputHeight, kStridedInputWidth)
6314 .padding(1)
6315 .padding_width(1)
6316 .kernel_size(3, 3)
6317 .stride_height(stride_height)
6318 .groups(2)
6319 .group_input_channels(17)
6320 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6321 .iterations(3)
6322 .TestQU8();
6323 }
6324 }
6325
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s1xS)6326 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s1xS) {
6327 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6328 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6329 DeconvolutionOperatorTester()
6330 .input_size(kStridedInputHeight, kStridedInputWidth)
6331 .padding(1)
6332 .padding_width(1)
6333 .kernel_size(3, 3)
6334 .stride_width(stride_width)
6335 .groups(2)
6336 .group_input_channels(17)
6337 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6338 .iterations(3)
6339 .TestQU8();
6340 }
6341 }
6342
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_height_padding)6343 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_height_padding) {
6344 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6345 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6346 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6347 DeconvolutionOperatorTester()
6348 .input_size(kStridedInputHeight, kStridedInputWidth)
6349 .padding_width(1)
6350 .padding_top(padding_top)
6351 .padding_bottom(padding_bottom)
6352 .kernel_size(3, 3)
6353 .stride(2)
6354 .groups(2)
6355 .group_input_channels(17)
6356 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6357 .iterations(1)
6358 .TestQU8();
6359 }
6360 }
6361 }
6362
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_width_padding)6363 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_width_padding) {
6364 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6365 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6366 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6367 DeconvolutionOperatorTester()
6368 .input_size(kStridedInputHeight, kStridedInputWidth)
6369 .padding_height(1)
6370 .padding_left(padding_left)
6371 .padding_right(padding_right)
6372 .kernel_size(3, 3)
6373 .stride(2)
6374 .groups(2)
6375 .group_input_channels(17)
6376 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6377 .iterations(1)
6378 .TestQU8();
6379 }
6380 }
6381 }
6382
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_height_adjustment)6383 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_height_adjustment) {
6384 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6385 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
6386 DeconvolutionOperatorTester()
6387 .input_size(kStridedInputHeight, kStridedInputWidth)
6388 .padding(1)
6389 .adjustment_height(adjustment_height)
6390 .kernel_size(3, 3)
6391 .stride(2)
6392 .groups(2)
6393 .group_input_channels(17)
6394 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6395 .iterations(1)
6396 .TestQU8();
6397 }
6398 }
6399
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_width_adjustment)6400 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_width_adjustment) {
6401 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6402 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
6403 DeconvolutionOperatorTester()
6404 .input_size(kStridedInputHeight, kStridedInputWidth)
6405 .padding(1)
6406 .adjustment_width(adjustment_width)
6407 .kernel_size(3, 3)
6408 .stride(2)
6409 .groups(2)
6410 .group_input_channels(17)
6411 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6412 .iterations(1)
6413 .TestQU8();
6414 }
6415 }
6416
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_input_height)6417 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_input_height) {
6418 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6419 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
6420 DeconvolutionOperatorTester()
6421 .input_size(input_height, kStridedInputWidth)
6422 .padding(1)
6423 .kernel_size(3, 3)
6424 .stride(2)
6425 .groups(2)
6426 .group_input_channels(17)
6427 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6428 .iterations(1)
6429 .TestQU8();
6430 }
6431 }
6432
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_input_width)6433 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_input_width) {
6434 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6435 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
6436 DeconvolutionOperatorTester()
6437 .input_size(kStridedInputHeight, kStridedInputWidth)
6438 .padding(1)
6439 .kernel_size(3, 3)
6440 .stride(2)
6441 .groups(2)
6442 .group_input_channels(17)
6443 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6444 .iterations(1)
6445 .TestQU8();
6446 }
6447 }
6448
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_input_channels)6449 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_input_channels) {
6450 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6451 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
6452 DeconvolutionOperatorTester()
6453 .input_size(kStridedInputHeight, kStridedInputWidth)
6454 .padding(1)
6455 .kernel_size(3, 3)
6456 .stride(2)
6457 .groups(2)
6458 .group_input_channels(input_channels)
6459 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6460 .iterations(1)
6461 .TestQU8();
6462 }
6463 }
6464
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_output_channels)6465 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_output_channels) {
6466 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6467 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
6468 DeconvolutionOperatorTester()
6469 .input_size(kStridedInputHeight, kStridedInputWidth)
6470 .padding(1)
6471 .kernel_size(3, 3)
6472 .stride(2)
6473 .groups(2)
6474 .group_input_channels(17)
6475 .group_output_channels(output_channels)
6476 .iterations(1)
6477 .TestQU8();
6478 }
6479 }
6480
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_input_stride)6481 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_input_stride) {
6482 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6483 DeconvolutionOperatorTester()
6484 .input_size(kStridedInputHeight, kStridedInputWidth)
6485 .padding(1)
6486 .kernel_size(3, 3)
6487 .stride(2)
6488 .groups(2)
6489 .group_input_channels(17)
6490 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6491 .input_pixel_stride(37)
6492 .iterations(3)
6493 .TestQU8();
6494 }
6495
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_output_stride)6496 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_output_stride) {
6497 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6498 DeconvolutionOperatorTester()
6499 .input_size(kStridedInputHeight, kStridedInputWidth)
6500 .padding(1)
6501 .kernel_size(3, 3)
6502 .stride(2)
6503 .groups(2)
6504 .group_input_channels(17)
6505 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
6506 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
6507 .iterations(3)
6508 .TestQU8();
6509 }
6510
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_qmin)6511 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_qmin) {
6512 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6513 DeconvolutionOperatorTester()
6514 .input_size(kStridedInputHeight, kStridedInputWidth)
6515 .padding(1)
6516 .kernel_size(3, 3)
6517 .stride(2)
6518 .groups(2)
6519 .group_input_channels(17)
6520 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6521 .qmin(128)
6522 .iterations(3)
6523 .TestQU8();
6524 }
6525
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_qmax)6526 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_qmax) {
6527 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6528 DeconvolutionOperatorTester()
6529 .input_size(kStridedInputHeight, kStridedInputWidth)
6530 .padding(1)
6531 .kernel_size(3, 3)
6532 .stride(2)
6533 .groups(2)
6534 .group_input_channels(17)
6535 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6536 .qmax(128)
6537 .iterations(3)
6538 .TestQU8();
6539 }
6540
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_without_bias)6541 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_without_bias) {
6542 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6543 DeconvolutionOperatorTester()
6544 .has_bias(false)
6545 .input_size(kStridedInputHeight, kStridedInputWidth)
6546 .padding(1)
6547 .kernel_size(3, 3)
6548 .stride(2)
6549 .groups(2)
6550 .group_input_channels(17)
6551 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6552 .iterations(3)
6553 .TestQU8();
6554 }
6555
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_grouped_3x3s2)6556 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_grouped_3x3s2) {
6557 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6558 DeconvolutionOperatorTester()
6559 .input_size(kStridedInputHeight, kStridedInputWidth)
6560 .padding(1)
6561 .kernel_size(3, 3)
6562 .stride(2)
6563 .groups(2)
6564 .group_input_channels(17)
6565 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6566 .use_weights_cache(true)
6567 .iterations(3)
6568 .TestQU8();
6569 }
6570
6571 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
6572
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2)6573 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2) {
6574 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6575 DeconvolutionOperatorTester()
6576 .batch_size(2)
6577 .input_size(kStridedInputHeight, kStridedInputWidth)
6578 .padding(1)
6579 .kernel_size(3, 3)
6580 .stride(2)
6581 .group_input_channels(15)
6582 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6583 .iterations(3)
6584 .TestQU8();
6585 }
6586
TEST(DECONVOLUTION_NHWC_QU8,batched_Kx3s2)6587 TEST(DECONVOLUTION_NHWC_QU8, batched_Kx3s2) {
6588 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6589 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
6590 DeconvolutionOperatorTester()
6591 .batch_size(2)
6592 .input_size(kStridedInputHeight, kStridedInputWidth)
6593 .padding_width(1)
6594 .kernel_size(kernel_height, 3)
6595 .stride(2)
6596 .group_input_channels(17)
6597 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6598 .iterations(3)
6599 .TestQU8();
6600 }
6601 }
6602
TEST(DECONVOLUTION_NHWC_QU8,batched_3xKs2)6603 TEST(DECONVOLUTION_NHWC_QU8, batched_3xKs2) {
6604 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6605 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6606 DeconvolutionOperatorTester()
6607 .batch_size(2)
6608 .input_size(kStridedInputHeight, kStridedInputWidth)
6609 .padding_height(1)
6610 .kernel_size(3, kernel_width)
6611 .stride(2)
6612 .group_input_channels(17)
6613 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6614 .iterations(3)
6615 .TestQU8();
6616 }
6617 }
6618
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3sSx1)6619 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3sSx1) {
6620 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6621 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6622 DeconvolutionOperatorTester()
6623 .batch_size(2)
6624 .input_size(kStridedInputHeight, kStridedInputWidth)
6625 .padding(1)
6626 .padding_width(1)
6627 .kernel_size(3, 3)
6628 .stride_height(stride_height)
6629 .group_input_channels(17)
6630 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6631 .iterations(3)
6632 .TestQU8();
6633 }
6634 }
6635
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s1xS)6636 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s1xS) {
6637 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6638 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6639 DeconvolutionOperatorTester()
6640 .batch_size(2)
6641 .input_size(kStridedInputHeight, kStridedInputWidth)
6642 .padding(1)
6643 .padding_width(1)
6644 .kernel_size(3, 3)
6645 .stride_width(stride_width)
6646 .group_input_channels(17)
6647 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6648 .iterations(3)
6649 .TestQU8();
6650 }
6651 }
6652
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_height_padding)6653 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_height_padding) {
6654 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6655 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6656 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6657 DeconvolutionOperatorTester()
6658 .batch_size(2)
6659 .input_size(kStridedInputHeight, kStridedInputWidth)
6660 .padding_width(1)
6661 .padding_top(padding_top)
6662 .padding_bottom(padding_bottom)
6663 .kernel_size(3, 3)
6664 .stride(2)
6665 .group_input_channels(15)
6666 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6667 .iterations(1)
6668 .TestQU8();
6669 }
6670 }
6671 }
6672
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_width_padding)6673 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_width_padding) {
6674 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6675 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6676 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6677 DeconvolutionOperatorTester()
6678 .batch_size(2)
6679 .input_size(kStridedInputHeight, kStridedInputWidth)
6680 .padding_height(1)
6681 .padding_left(padding_left)
6682 .padding_right(padding_right)
6683 .kernel_size(3, 3)
6684 .stride(2)
6685 .group_input_channels(15)
6686 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6687 .iterations(1)
6688 .TestQU8();
6689 }
6690 }
6691 }
6692
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_height_adjustment)6693 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_height_adjustment) {
6694 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6695 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
6696 DeconvolutionOperatorTester()
6697 .batch_size(2)
6698 .input_size(kStridedInputHeight, kStridedInputWidth)
6699 .padding(1)
6700 .adjustment_height(adjustment_height)
6701 .kernel_size(3, 3)
6702 .stride(2)
6703 .group_input_channels(15)
6704 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6705 .iterations(1)
6706 .TestQU8();
6707 }
6708 }
6709
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_width_adjustment)6710 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_width_adjustment) {
6711 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6712 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
6713 DeconvolutionOperatorTester()
6714 .batch_size(2)
6715 .input_size(kStridedInputHeight, kStridedInputWidth)
6716 .padding(1)
6717 .adjustment_width(adjustment_width)
6718 .kernel_size(3, 3)
6719 .stride(2)
6720 .group_input_channels(15)
6721 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6722 .iterations(1)
6723 .TestQU8();
6724 }
6725 }
6726
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_input_height)6727 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_input_height) {
6728 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6729 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
6730 DeconvolutionOperatorTester()
6731 .batch_size(2)
6732 .input_size(input_height, kStridedInputWidth)
6733 .padding(1)
6734 .kernel_size(3, 3)
6735 .stride(2)
6736 .group_input_channels(15)
6737 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6738 .iterations(1)
6739 .TestQU8();
6740 }
6741 }
6742
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_input_width)6743 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_input_width) {
6744 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6745 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
6746 DeconvolutionOperatorTester()
6747 .batch_size(2)
6748 .input_size(kStridedInputHeight, kStridedInputWidth)
6749 .padding(1)
6750 .kernel_size(3, 3)
6751 .stride(2)
6752 .group_input_channels(15)
6753 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6754 .iterations(1)
6755 .TestQU8();
6756 }
6757 }
6758
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_input_channels)6759 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_input_channels) {
6760 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6761 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
6762 DeconvolutionOperatorTester()
6763 .batch_size(2)
6764 .input_size(kStridedInputHeight, kStridedInputWidth)
6765 .padding(1)
6766 .kernel_size(3, 3)
6767 .stride(2)
6768 .group_input_channels(input_channels)
6769 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6770 .iterations(1)
6771 .TestQU8();
6772 }
6773 }
6774
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_output_channels)6775 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_output_channels) {
6776 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6777 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
6778 DeconvolutionOperatorTester()
6779 .batch_size(2)
6780 .input_size(kStridedInputHeight, kStridedInputWidth)
6781 .padding(1)
6782 .kernel_size(3, 3)
6783 .stride(2)
6784 .group_input_channels(23)
6785 .group_output_channels(output_channels)
6786 .iterations(1)
6787 .TestQU8();
6788 }
6789 }
6790
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_input_stride)6791 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_input_stride) {
6792 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6793 DeconvolutionOperatorTester()
6794 .batch_size(2)
6795 .input_size(kStridedInputHeight, kStridedInputWidth)
6796 .padding(1)
6797 .kernel_size(3, 3)
6798 .stride(2)
6799 .group_input_channels(23)
6800 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6801 .input_pixel_stride(28)
6802 .iterations(3)
6803 .TestQU8();
6804 }
6805
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_output_stride)6806 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_output_stride) {
6807 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6808 DeconvolutionOperatorTester()
6809 .batch_size(2)
6810 .input_size(kStridedInputHeight, kStridedInputWidth)
6811 .padding(1)
6812 .kernel_size(3, 3)
6813 .stride(2)
6814 .group_input_channels(23)
6815 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6816 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
6817 .iterations(3)
6818 .TestQU8();
6819 }
6820
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_qmin)6821 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_qmin) {
6822 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6823 DeconvolutionOperatorTester()
6824 .batch_size(2)
6825 .input_size(kStridedInputHeight, kStridedInputWidth)
6826 .padding(1)
6827 .kernel_size(3, 3)
6828 .stride(2)
6829 .group_input_channels(23)
6830 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6831 .qmin(128)
6832 .iterations(3)
6833 .TestQU8();
6834 }
6835
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_qmax)6836 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_qmax) {
6837 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6838 DeconvolutionOperatorTester()
6839 .batch_size(2)
6840 .input_size(kStridedInputHeight, kStridedInputWidth)
6841 .padding(1)
6842 .kernel_size(3, 3)
6843 .stride(2)
6844 .group_input_channels(23)
6845 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6846 .qmax(128)
6847 .iterations(3)
6848 .TestQU8();
6849 }
6850
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_without_bias)6851 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_without_bias) {
6852 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6853 DeconvolutionOperatorTester()
6854 .has_bias(false)
6855 .batch_size(2)
6856 .input_size(kStridedInputHeight, kStridedInputWidth)
6857 .padding(1)
6858 .kernel_size(3, 3)
6859 .stride(2)
6860 .group_input_channels(23)
6861 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6862 .iterations(3)
6863 .TestQU8();
6864 }
6865
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_3x3s2)6866 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_3x3s2) {
6867 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6868 DeconvolutionOperatorTester()
6869 .batch_size(2)
6870 .input_size(kStridedInputHeight, kStridedInputWidth)
6871 .padding(1)
6872 .kernel_size(3, 3)
6873 .stride(2)
6874 .group_input_channels(15)
6875 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6876 .use_weights_cache(true)
6877 .iterations(3)
6878 .TestQU8();
6879 }
6880
6881 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
6882
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2)6883 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2) {
6884 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6885 DeconvolutionOperatorTester()
6886 .batch_size(2)
6887 .input_size(kStridedInputHeight, kStridedInputWidth)
6888 .padding(1)
6889 .kernel_size(3, 3)
6890 .stride(2)
6891 .groups(2)
6892 .group_input_channels(17)
6893 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6894 .iterations(3)
6895 .TestQU8();
6896 }
6897
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_Kx3s2)6898 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_Kx3s2) {
6899 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6900 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
6901 DeconvolutionOperatorTester()
6902 .batch_size(2)
6903 .input_size(kStridedInputHeight, kStridedInputWidth)
6904 .padding_width(1)
6905 .kernel_size(kernel_height, 3)
6906 .stride(2)
6907 .groups(2)
6908 .group_input_channels(17)
6909 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6910 .iterations(3)
6911 .TestQU8();
6912 }
6913 }
6914
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3xKs2)6915 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3xKs2) {
6916 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6917 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6918 DeconvolutionOperatorTester()
6919 .batch_size(2)
6920 .input_size(kStridedInputHeight, kStridedInputWidth)
6921 .padding_height(1)
6922 .kernel_size(3, kernel_width)
6923 .stride(2)
6924 .groups(2)
6925 .group_input_channels(17)
6926 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6927 .iterations(3)
6928 .TestQU8();
6929 }
6930 }
6931
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3sSx1)6932 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3sSx1) {
6933 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6934 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6935 DeconvolutionOperatorTester()
6936 .batch_size(2)
6937 .input_size(kStridedInputHeight, kStridedInputWidth)
6938 .padding(1)
6939 .padding_width(1)
6940 .kernel_size(3, 3)
6941 .stride_height(stride_height)
6942 .groups(2)
6943 .group_input_channels(17)
6944 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6945 .iterations(3)
6946 .TestQU8();
6947 }
6948 }
6949
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s1xS)6950 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s1xS) {
6951 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6952 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6953 DeconvolutionOperatorTester()
6954 .batch_size(2)
6955 .input_size(kStridedInputHeight, kStridedInputWidth)
6956 .padding(1)
6957 .padding_width(1)
6958 .kernel_size(3, 3)
6959 .stride_width(stride_width)
6960 .groups(2)
6961 .group_input_channels(17)
6962 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6963 .iterations(3)
6964 .TestQU8();
6965 }
6966 }
6967
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_height_padding)6968 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_height_padding) {
6969 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6970 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6971 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6972 DeconvolutionOperatorTester()
6973 .batch_size(2)
6974 .input_size(kStridedInputHeight, kStridedInputWidth)
6975 .padding_width(1)
6976 .padding_top(padding_top)
6977 .padding_bottom(padding_bottom)
6978 .kernel_size(3, 3)
6979 .stride(2)
6980 .groups(2)
6981 .group_input_channels(17)
6982 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6983 .iterations(1)
6984 .TestQU8();
6985 }
6986 }
6987 }
6988
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_width_padding)6989 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_width_padding) {
6990 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6991 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6992 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6993 DeconvolutionOperatorTester()
6994 .batch_size(2)
6995 .input_size(kStridedInputHeight, kStridedInputWidth)
6996 .padding_height(1)
6997 .padding_left(padding_left)
6998 .padding_right(padding_right)
6999 .kernel_size(3, 3)
7000 .stride(2)
7001 .groups(2)
7002 .group_input_channels(17)
7003 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7004 .iterations(1)
7005 .TestQU8();
7006 }
7007 }
7008 }
7009
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_height_adjustment)7010 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_height_adjustment) {
7011 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7012 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
7013 DeconvolutionOperatorTester()
7014 .batch_size(2)
7015 .input_size(kStridedInputHeight, kStridedInputWidth)
7016 .padding(1)
7017 .adjustment_height(adjustment_height)
7018 .kernel_size(3, 3)
7019 .stride(2)
7020 .groups(2)
7021 .group_input_channels(17)
7022 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7023 .iterations(1)
7024 .TestQU8();
7025 }
7026 }
7027
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_width_adjustment)7028 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_width_adjustment) {
7029 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7030 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
7031 DeconvolutionOperatorTester()
7032 .batch_size(2)
7033 .input_size(kStridedInputHeight, kStridedInputWidth)
7034 .padding(1)
7035 .adjustment_width(adjustment_width)
7036 .kernel_size(3, 3)
7037 .stride(2)
7038 .groups(2)
7039 .group_input_channels(17)
7040 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7041 .iterations(1)
7042 .TestQU8();
7043 }
7044 }
7045
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_input_height)7046 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_input_height) {
7047 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7048 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7049 DeconvolutionOperatorTester()
7050 .batch_size(2)
7051 .input_size(input_height, kStridedInputWidth)
7052 .padding(1)
7053 .kernel_size(3, 3)
7054 .stride(2)
7055 .groups(2)
7056 .group_input_channels(17)
7057 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7058 .iterations(1)
7059 .TestQU8();
7060 }
7061 }
7062
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_input_width)7063 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_input_width) {
7064 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7065 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7066 DeconvolutionOperatorTester()
7067 .batch_size(2)
7068 .input_size(kStridedInputHeight, kStridedInputWidth)
7069 .padding(1)
7070 .kernel_size(3, 3)
7071 .stride(2)
7072 .groups(2)
7073 .group_input_channels(17)
7074 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7075 .iterations(1)
7076 .TestQU8();
7077 }
7078 }
7079
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_input_channels)7080 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_input_channels) {
7081 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7082 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
7083 DeconvolutionOperatorTester()
7084 .batch_size(2)
7085 .input_size(kStridedInputHeight, kStridedInputWidth)
7086 .padding(1)
7087 .kernel_size(3, 3)
7088 .stride(2)
7089 .groups(2)
7090 .group_input_channels(input_channels)
7091 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7092 .iterations(1)
7093 .TestQU8();
7094 }
7095 }
7096
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_output_channels)7097 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_output_channels) {
7098 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7099 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7100 DeconvolutionOperatorTester()
7101 .batch_size(2)
7102 .input_size(kStridedInputHeight, kStridedInputWidth)
7103 .padding(1)
7104 .kernel_size(3, 3)
7105 .stride(2)
7106 .groups(2)
7107 .group_input_channels(17)
7108 .group_output_channels(output_channels)
7109 .iterations(1)
7110 .TestQU8();
7111 }
7112 }
7113
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_input_stride)7114 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_input_stride) {
7115 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7116 DeconvolutionOperatorTester()
7117 .batch_size(2)
7118 .input_size(kStridedInputHeight, kStridedInputWidth)
7119 .padding(1)
7120 .kernel_size(3, 3)
7121 .stride(2)
7122 .groups(2)
7123 .group_input_channels(17)
7124 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7125 .input_pixel_stride(37)
7126 .iterations(3)
7127 .TestQU8();
7128 }
7129
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_output_stride)7130 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_output_stride) {
7131 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7132 DeconvolutionOperatorTester()
7133 .batch_size(2)
7134 .input_size(kStridedInputHeight, kStridedInputWidth)
7135 .padding(1)
7136 .kernel_size(3, 3)
7137 .stride(2)
7138 .groups(2)
7139 .group_input_channels(17)
7140 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
7141 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7142 .iterations(3)
7143 .TestQU8();
7144 }
7145
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_qmin)7146 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_qmin) {
7147 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7148 DeconvolutionOperatorTester()
7149 .batch_size(2)
7150 .input_size(kStridedInputHeight, kStridedInputWidth)
7151 .padding(1)
7152 .kernel_size(3, 3)
7153 .stride(2)
7154 .groups(2)
7155 .group_input_channels(17)
7156 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7157 .qmin(128)
7158 .iterations(3)
7159 .TestQU8();
7160 }
7161
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_qmax)7162 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_qmax) {
7163 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7164 DeconvolutionOperatorTester()
7165 .batch_size(2)
7166 .input_size(kStridedInputHeight, kStridedInputWidth)
7167 .padding(1)
7168 .kernel_size(3, 3)
7169 .stride(2)
7170 .groups(2)
7171 .group_input_channels(17)
7172 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7173 .qmax(128)
7174 .iterations(3)
7175 .TestQU8();
7176 }
7177
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_without_bias)7178 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_without_bias) {
7179 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7180 DeconvolutionOperatorTester()
7181 .has_bias(false)
7182 .batch_size(2)
7183 .input_size(kStridedInputHeight, kStridedInputWidth)
7184 .padding(1)
7185 .kernel_size(3, 3)
7186 .stride(2)
7187 .groups(2)
7188 .group_input_channels(17)
7189 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7190 .iterations(3)
7191 .TestQU8();
7192 }
7193
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_grouped_3x3s2)7194 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_grouped_3x3s2) {
7195 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7196 DeconvolutionOperatorTester()
7197 .batch_size(2)
7198 .input_size(kStridedInputHeight, kStridedInputWidth)
7199 .padding(1)
7200 .kernel_size(3, 3)
7201 .stride(2)
7202 .groups(2)
7203 .group_input_channels(17)
7204 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7205 .use_weights_cache(true)
7206 .iterations(3)
7207 .TestQU8();
7208 }
7209
7210 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
7211
7212 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_setup_changing_batch) {
7213 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7214 DeconvolutionOperatorTester()
7215 .batch_size(2)
7216 .next_batch_size(5)
7217 .input_size(kStridedInputHeight, kStridedInputWidth)
7218 .kernel_height(3)
7219 .kernel_width(5)
7220 .stride(2)
7221 .groups(2)
7222 .group_input_channels(15)
7223 .group_output_channels(17)
7224 .TestSetupQU8();
7225 }
7226
7227 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_setup_changing_height) {
7228 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7229 DeconvolutionOperatorTester()
7230 .batch_size(2)
7231 .input_size(kStridedInputHeight, kStridedInputWidth)
7232 .next_input_height(kStridedInputHeight + 3)
7233 .kernel_height(3)
7234 .kernel_width(5)
7235 .stride(2)
7236 .groups(2)
7237 .group_input_channels(15)
7238 .group_output_channels(17)
7239 .TestSetupQU8();
7240 }
7241
7242 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_setup_changing_width) {
7243 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7244 DeconvolutionOperatorTester()
7245 .batch_size(2)
7246 .input_size(kStridedInputHeight, kStridedInputWidth)
7247 .next_input_width(kStridedInputWidth + 3)
7248 .kernel_height(3)
7249 .kernel_width(5)
7250 .stride(2)
7251 .groups(2)
7252 .group_input_channels(15)
7253 .group_output_channels(17)
7254 .TestSetupQU8();
7255 }
7256
7257 /**************************** SUBCONV2D/GEMM path ****************************/
7258
7259 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2) {
7260 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7261 DeconvolutionOperatorTester()
7262 .input_size(kStridedInputHeight, kStridedInputWidth)
7263 .kernel_size(2, 2)
7264 .stride(2)
7265 .group_input_channels(15)
7266 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7267 .iterations(3)
7268 .TestQU8();
7269 }
7270
TEST(DECONVOLUTION_NHWC_QU8,Kx2sKx2)7271 TEST(DECONVOLUTION_NHWC_QU8, Kx2sKx2) {
7272 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7273 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7274 DeconvolutionOperatorTester()
7275 .input_size(kStridedInputHeight, kStridedInputWidth)
7276 .kernel_size(kernel_height, 2)
7277 .stride(kernel_height, 2)
7278 .group_input_channels(17)
7279 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7280 .iterations(3)
7281 .TestQU8();
7282 }
7283 }
7284
7285 TEST(DECONVOLUTION_NHWC_QU8, 2xKs2xK) {
7286 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7287 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7288 DeconvolutionOperatorTester()
7289 .input_size(kStridedInputHeight, kStridedInputWidth)
7290 .kernel_size(2, kernel_width)
7291 .stride(2, kernel_width)
7292 .group_input_channels(17)
7293 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7294 .iterations(3)
7295 .TestQU8();
7296 }
7297 }
7298
7299 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_height_adjustment) {
7300 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7301 DeconvolutionOperatorTester()
7302 .input_size(kStridedInputHeight, kStridedInputWidth)
7303 .adjustment_height(1)
7304 .kernel_size(2, 2)
7305 .stride(2)
7306 .group_input_channels(15)
7307 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7308 .iterations(1)
7309 .TestQU8();
7310 }
7311
7312 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_width_adjustment) {
7313 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7314 DeconvolutionOperatorTester()
7315 .input_size(kStridedInputHeight, kStridedInputWidth)
7316 .adjustment_width(1)
7317 .kernel_size(2, 2)
7318 .stride(2)
7319 .group_input_channels(15)
7320 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7321 .iterations(1)
7322 .TestQU8();
7323 }
7324
7325 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_input_height) {
7326 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7327 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7328 DeconvolutionOperatorTester()
7329 .input_size(input_height, kStridedInputWidth)
7330 .kernel_size(2, 2)
7331 .stride(2)
7332 .group_input_channels(15)
7333 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7334 .iterations(1)
7335 .TestQU8();
7336 }
7337 }
7338
7339 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_input_width) {
7340 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7341 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7342 DeconvolutionOperatorTester()
7343 .input_size(kStridedInputHeight, kStridedInputWidth)
7344 .kernel_size(2, 2)
7345 .stride(2)
7346 .group_input_channels(15)
7347 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7348 .iterations(1)
7349 .TestQU8();
7350 }
7351 }
7352
7353 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_input_channels) {
7354 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7355 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
7356 DeconvolutionOperatorTester()
7357 .input_size(kStridedInputHeight, kStridedInputWidth)
7358 .kernel_size(2, 2)
7359 .stride(2)
7360 .group_input_channels(input_channels)
7361 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7362 .iterations(1)
7363 .TestQU8();
7364 }
7365 }
7366
7367 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_output_channels) {
7368 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7369 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7370 DeconvolutionOperatorTester()
7371 .input_size(kStridedInputHeight, kStridedInputWidth)
7372 .kernel_size(2, 2)
7373 .stride(2)
7374 .group_input_channels(23)
7375 .group_output_channels(output_channels)
7376 .iterations(1)
7377 .TestQU8();
7378 }
7379 }
7380
7381 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_input_stride) {
7382 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7383 DeconvolutionOperatorTester()
7384 .input_size(kStridedInputHeight, kStridedInputWidth)
7385 .kernel_size(2, 2)
7386 .stride(2)
7387 .group_input_channels(23)
7388 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7389 .input_pixel_stride(28)
7390 .iterations(3)
7391 .TestQU8();
7392 }
7393
7394 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_output_stride) {
7395 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7396 DeconvolutionOperatorTester()
7397 .input_size(kStridedInputHeight, kStridedInputWidth)
7398 .kernel_size(2, 2)
7399 .stride(2)
7400 .group_input_channels(23)
7401 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7402 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7403 .iterations(3)
7404 .TestQU8();
7405 }
7406
7407 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_qmin) {
7408 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7409 DeconvolutionOperatorTester()
7410 .input_size(kStridedInputHeight, kStridedInputWidth)
7411 .kernel_size(2, 2)
7412 .stride(2)
7413 .group_input_channels(23)
7414 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7415 .qmin(128)
7416 .iterations(3)
7417 .TestQU8();
7418 }
7419
7420 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_qmax) {
7421 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7422 DeconvolutionOperatorTester()
7423 .input_size(kStridedInputHeight, kStridedInputWidth)
7424 .kernel_size(2, 2)
7425 .stride(2)
7426 .group_input_channels(23)
7427 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7428 .qmax(128)
7429 .iterations(3)
7430 .TestQU8();
7431 }
7432
7433 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_without_bias) {
7434 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7435 DeconvolutionOperatorTester()
7436 .has_bias(false)
7437 .input_size(kStridedInputHeight, kStridedInputWidth)
7438 .kernel_size(2, 2)
7439 .stride(2)
7440 .group_input_channels(23)
7441 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7442 .iterations(3)
7443 .TestQU8();
7444 }
7445
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_2x2s2)7446 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_2x2s2) {
7447 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7448 DeconvolutionOperatorTester()
7449 .input_size(kStridedInputHeight, kStridedInputWidth)
7450 .kernel_size(2, 2)
7451 .stride(2)
7452 .group_input_channels(15)
7453 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7454 .use_weights_cache(true)
7455 .iterations(3)
7456 .TestQU8();
7457 }
7458
7459 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
7460
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2)7461 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2) {
7462 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7463 DeconvolutionOperatorTester()
7464 .input_size(kStridedInputHeight, kStridedInputWidth)
7465 .kernel_size(2, 2)
7466 .stride(2)
7467 .groups(2)
7468 .group_input_channels(17)
7469 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7470 .iterations(3)
7471 .TestQU8();
7472 }
7473
TEST(DECONVOLUTION_NHWC_QU8,grouped_Kx2sKx2)7474 TEST(DECONVOLUTION_NHWC_QU8, grouped_Kx2sKx2) {
7475 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7476 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7477 DeconvolutionOperatorTester()
7478 .input_size(kStridedInputHeight, kStridedInputWidth)
7479 .kernel_size(kernel_height, 2)
7480 .stride(kernel_height, 2)
7481 .groups(2)
7482 .group_input_channels(17)
7483 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7484 .iterations(3)
7485 .TestQU8();
7486 }
7487 }
7488
TEST(DECONVOLUTION_NHWC_QU8,grouped_2xKs2xK)7489 TEST(DECONVOLUTION_NHWC_QU8, grouped_2xKs2xK) {
7490 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7491 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7492 DeconvolutionOperatorTester()
7493 .input_size(kStridedInputHeight, kStridedInputWidth)
7494 .kernel_size(2, kernel_width)
7495 .stride(2, kernel_width)
7496 .groups(2)
7497 .group_input_channels(17)
7498 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7499 .iterations(3)
7500 .TestQU8();
7501 }
7502 }
7503
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_height_adjustment)7504 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_height_adjustment) {
7505 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7506 DeconvolutionOperatorTester()
7507 .input_size(kStridedInputHeight, kStridedInputWidth)
7508 .adjustment_height(1)
7509 .kernel_size(2, 2)
7510 .stride(2)
7511 .groups(2)
7512 .group_input_channels(17)
7513 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7514 .iterations(1)
7515 .TestQU8();
7516 }
7517
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_width_adjustment)7518 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_width_adjustment) {
7519 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7520 DeconvolutionOperatorTester()
7521 .input_size(kStridedInputHeight, kStridedInputWidth)
7522 .adjustment_width(1)
7523 .kernel_size(2, 2)
7524 .stride(2)
7525 .groups(2)
7526 .group_input_channels(17)
7527 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7528 .iterations(1)
7529 .TestQU8();
7530 }
7531
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_input_height)7532 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_input_height) {
7533 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7534 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7535 DeconvolutionOperatorTester()
7536 .input_size(input_height, kStridedInputWidth)
7537 .kernel_size(2, 2)
7538 .stride(2)
7539 .groups(2)
7540 .group_input_channels(17)
7541 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7542 .iterations(1)
7543 .TestQU8();
7544 }
7545 }
7546
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_input_width)7547 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_input_width) {
7548 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7549 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7550 DeconvolutionOperatorTester()
7551 .input_size(kStridedInputHeight, kStridedInputWidth)
7552 .kernel_size(2, 2)
7553 .stride(2)
7554 .groups(2)
7555 .group_input_channels(17)
7556 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7557 .iterations(1)
7558 .TestQU8();
7559 }
7560 }
7561
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_input_channels)7562 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_input_channels) {
7563 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7564 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
7565 DeconvolutionOperatorTester()
7566 .input_size(kStridedInputHeight, kStridedInputWidth)
7567 .kernel_size(2, 2)
7568 .stride(2)
7569 .groups(2)
7570 .group_input_channels(input_channels)
7571 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7572 .iterations(1)
7573 .TestQU8();
7574 }
7575 }
7576
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_output_channels)7577 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_output_channels) {
7578 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7579 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7580 DeconvolutionOperatorTester()
7581 .input_size(kStridedInputHeight, kStridedInputWidth)
7582 .kernel_size(2, 2)
7583 .stride(2)
7584 .groups(2)
7585 .group_input_channels(17)
7586 .group_output_channels(output_channels)
7587 .iterations(1)
7588 .TestQU8();
7589 }
7590 }
7591
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_input_stride)7592 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_input_stride) {
7593 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7594 DeconvolutionOperatorTester()
7595 .input_size(kStridedInputHeight, kStridedInputWidth)
7596 .kernel_size(2, 2)
7597 .stride(2)
7598 .groups(2)
7599 .group_input_channels(17)
7600 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7601 .input_pixel_stride(37)
7602 .iterations(3)
7603 .TestQU8();
7604 }
7605
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_output_stride)7606 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_output_stride) {
7607 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7608 DeconvolutionOperatorTester()
7609 .input_size(kStridedInputHeight, kStridedInputWidth)
7610 .kernel_size(2, 2)
7611 .stride(2)
7612 .groups(2)
7613 .group_input_channels(17)
7614 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
7615 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7616 .iterations(3)
7617 .TestQU8();
7618 }
7619
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_qmin)7620 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_qmin) {
7621 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7622 DeconvolutionOperatorTester()
7623 .input_size(kStridedInputHeight, kStridedInputWidth)
7624 .kernel_size(2, 2)
7625 .stride(2)
7626 .groups(2)
7627 .group_input_channels(17)
7628 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7629 .qmin(128)
7630 .iterations(3)
7631 .TestQU8();
7632 }
7633
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_qmax)7634 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_qmax) {
7635 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7636 DeconvolutionOperatorTester()
7637 .input_size(kStridedInputHeight, kStridedInputWidth)
7638 .kernel_size(2, 2)
7639 .stride(2)
7640 .groups(2)
7641 .group_input_channels(17)
7642 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7643 .qmax(128)
7644 .iterations(3)
7645 .TestQU8();
7646 }
7647
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_without_bias)7648 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_without_bias) {
7649 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7650 DeconvolutionOperatorTester()
7651 .has_bias(false)
7652 .input_size(kStridedInputHeight, kStridedInputWidth)
7653 .kernel_size(2, 2)
7654 .stride(2)
7655 .groups(2)
7656 .group_input_channels(17)
7657 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7658 .iterations(3)
7659 .TestQU8();
7660 }
7661
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_grouped_2x2s2)7662 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_grouped_2x2s2) {
7663 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7664 DeconvolutionOperatorTester()
7665 .input_size(kStridedInputHeight, kStridedInputWidth)
7666 .kernel_size(2, 2)
7667 .stride(2)
7668 .groups(2)
7669 .group_input_channels(17)
7670 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7671 .use_weights_cache(true)
7672 .iterations(3)
7673 .TestQU8();
7674 }
7675
7676 /**************************** SUBCONV2D/GEMM path, batched ****************************/
7677
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2)7678 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2) {
7679 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7680 DeconvolutionOperatorTester()
7681 .batch_size(2)
7682 .input_size(kStridedInputHeight, kStridedInputWidth)
7683 .kernel_size(2, 2)
7684 .stride(2)
7685 .group_input_channels(15)
7686 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7687 .iterations(3)
7688 .TestQU8();
7689 }
7690
TEST(DECONVOLUTION_NHWC_QU8,batched_Kx2sKx2)7691 TEST(DECONVOLUTION_NHWC_QU8, batched_Kx2sKx2) {
7692 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7693 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7694 DeconvolutionOperatorTester()
7695 .batch_size(2)
7696 .input_size(kStridedInputHeight, kStridedInputWidth)
7697 .kernel_size(kernel_height, 2)
7698 .stride(kernel_height, 2)
7699 .group_input_channels(17)
7700 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7701 .iterations(3)
7702 .TestQU8();
7703 }
7704 }
7705
TEST(DECONVOLUTION_NHWC_QU8,batched_2xKs2xK)7706 TEST(DECONVOLUTION_NHWC_QU8, batched_2xKs2xK) {
7707 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7708 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7709 DeconvolutionOperatorTester()
7710 .batch_size(2)
7711 .input_size(kStridedInputHeight, kStridedInputWidth)
7712 .kernel_size(2, kernel_width)
7713 .stride(2, kernel_width)
7714 .group_input_channels(17)
7715 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7716 .iterations(3)
7717 .TestQU8();
7718 }
7719 }
7720
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_height_adjustment)7721 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_height_adjustment) {
7722 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7723 DeconvolutionOperatorTester()
7724 .batch_size(2)
7725 .input_size(kStridedInputHeight, kStridedInputWidth)
7726 .adjustment_height(1)
7727 .kernel_size(2, 2)
7728 .stride(2)
7729 .group_input_channels(15)
7730 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7731 .iterations(1)
7732 .TestQU8();
7733 }
7734
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_width_adjustment)7735 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_width_adjustment) {
7736 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7737 DeconvolutionOperatorTester()
7738 .batch_size(2)
7739 .input_size(kStridedInputHeight, kStridedInputWidth)
7740 .adjustment_width(1)
7741 .kernel_size(2, 2)
7742 .stride(2)
7743 .group_input_channels(15)
7744 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7745 .iterations(1)
7746 .TestQU8();
7747 }
7748
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_input_height)7749 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_input_height) {
7750 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7751 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7752 DeconvolutionOperatorTester()
7753 .batch_size(2)
7754 .input_size(input_height, kStridedInputWidth)
7755 .kernel_size(2, 2)
7756 .stride(2)
7757 .group_input_channels(15)
7758 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7759 .iterations(1)
7760 .TestQU8();
7761 }
7762 }
7763
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_input_width)7764 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_input_width) {
7765 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7766 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7767 DeconvolutionOperatorTester()
7768 .batch_size(2)
7769 .input_size(kStridedInputHeight, kStridedInputWidth)
7770 .kernel_size(2, 2)
7771 .stride(2)
7772 .group_input_channels(15)
7773 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7774 .iterations(1)
7775 .TestQU8();
7776 }
7777 }
7778
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_input_channels)7779 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_input_channels) {
7780 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7781 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
7782 DeconvolutionOperatorTester()
7783 .batch_size(2)
7784 .input_size(kStridedInputHeight, kStridedInputWidth)
7785 .kernel_size(2, 2)
7786 .stride(2)
7787 .group_input_channels(input_channels)
7788 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7789 .iterations(1)
7790 .TestQU8();
7791 }
7792 }
7793
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_output_channels)7794 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_output_channels) {
7795 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7796 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7797 DeconvolutionOperatorTester()
7798 .batch_size(2)
7799 .input_size(kStridedInputHeight, kStridedInputWidth)
7800 .kernel_size(2, 2)
7801 .stride(2)
7802 .group_input_channels(23)
7803 .group_output_channels(output_channels)
7804 .iterations(1)
7805 .TestQU8();
7806 }
7807 }
7808
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_input_stride)7809 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_input_stride) {
7810 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7811 DeconvolutionOperatorTester()
7812 .batch_size(2)
7813 .input_size(kStridedInputHeight, kStridedInputWidth)
7814 .kernel_size(2, 2)
7815 .stride(2)
7816 .group_input_channels(23)
7817 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7818 .input_pixel_stride(28)
7819 .iterations(3)
7820 .TestQU8();
7821 }
7822
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_output_stride)7823 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_output_stride) {
7824 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7825 DeconvolutionOperatorTester()
7826 .batch_size(2)
7827 .input_size(kStridedInputHeight, kStridedInputWidth)
7828 .kernel_size(2, 2)
7829 .stride(2)
7830 .group_input_channels(23)
7831 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7832 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7833 .iterations(3)
7834 .TestQU8();
7835 }
7836
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_qmin)7837 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_qmin) {
7838 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7839 DeconvolutionOperatorTester()
7840 .batch_size(2)
7841 .input_size(kStridedInputHeight, kStridedInputWidth)
7842 .kernel_size(2, 2)
7843 .stride(2)
7844 .group_input_channels(23)
7845 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7846 .qmin(128)
7847 .iterations(3)
7848 .TestQU8();
7849 }
7850
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_qmax)7851 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_qmax) {
7852 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7853 DeconvolutionOperatorTester()
7854 .batch_size(2)
7855 .input_size(kStridedInputHeight, kStridedInputWidth)
7856 .kernel_size(2, 2)
7857 .stride(2)
7858 .group_input_channels(23)
7859 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7860 .qmax(128)
7861 .iterations(3)
7862 .TestQU8();
7863 }
7864
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_without_bias)7865 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_without_bias) {
7866 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7867 DeconvolutionOperatorTester()
7868 .has_bias(false)
7869 .batch_size(2)
7870 .input_size(kStridedInputHeight, kStridedInputWidth)
7871 .kernel_size(2, 2)
7872 .stride(2)
7873 .group_input_channels(23)
7874 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7875 .iterations(3)
7876 .TestQU8();
7877 }
7878
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_2x2s2)7879 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_2x2s2) {
7880 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7881 DeconvolutionOperatorTester()
7882 .batch_size(2)
7883 .input_size(kStridedInputHeight, kStridedInputWidth)
7884 .kernel_size(2, 2)
7885 .stride(2)
7886 .group_input_channels(15)
7887 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7888 .use_weights_cache(true)
7889 .iterations(3)
7890 .TestQU8();
7891 }
7892
7893 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
7894
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2)7895 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2) {
7896 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7897 DeconvolutionOperatorTester()
7898 .batch_size(2)
7899 .input_size(kStridedInputHeight, kStridedInputWidth)
7900 .kernel_size(2, 2)
7901 .stride(2)
7902 .groups(2)
7903 .group_input_channels(17)
7904 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7905 .iterations(3)
7906 .TestQU8();
7907 }
7908
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_Kx2sKx2)7909 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_Kx2sKx2) {
7910 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7911 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7912 DeconvolutionOperatorTester()
7913 .batch_size(2)
7914 .input_size(kStridedInputHeight, kStridedInputWidth)
7915 .kernel_size(kernel_height, 2)
7916 .stride(kernel_height, 2)
7917 .groups(2)
7918 .group_input_channels(17)
7919 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7920 .iterations(3)
7921 .TestQU8();
7922 }
7923 }
7924
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2xKs2xK)7925 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2xKs2xK) {
7926 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7927 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7928 DeconvolutionOperatorTester()
7929 .batch_size(2)
7930 .input_size(kStridedInputHeight, kStridedInputWidth)
7931 .kernel_size(2, kernel_width)
7932 .stride(2, kernel_width)
7933 .groups(2)
7934 .group_input_channels(17)
7935 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7936 .iterations(3)
7937 .TestQU8();
7938 }
7939 }
7940
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_height_adjustment)7941 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_height_adjustment) {
7942 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7943 DeconvolutionOperatorTester()
7944 .batch_size(2)
7945 .input_size(kStridedInputHeight, kStridedInputWidth)
7946 .adjustment_height(1)
7947 .kernel_size(2, 2)
7948 .stride(2)
7949 .groups(2)
7950 .group_input_channels(17)
7951 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7952 .iterations(1)
7953 .TestQU8();
7954 }
7955
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_width_adjustment)7956 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_width_adjustment) {
7957 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7958 DeconvolutionOperatorTester()
7959 .batch_size(2)
7960 .input_size(kStridedInputHeight, kStridedInputWidth)
7961 .adjustment_width(1)
7962 .kernel_size(2, 2)
7963 .stride(2)
7964 .groups(2)
7965 .group_input_channels(17)
7966 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7967 .iterations(1)
7968 .TestQU8();
7969 }
7970
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_input_height)7971 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_input_height) {
7972 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7973 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7974 DeconvolutionOperatorTester()
7975 .batch_size(2)
7976 .input_size(input_height, kStridedInputWidth)
7977 .kernel_size(2, 2)
7978 .stride(2)
7979 .groups(2)
7980 .group_input_channels(17)
7981 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7982 .iterations(1)
7983 .TestQU8();
7984 }
7985 }
7986
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_input_width)7987 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_input_width) {
7988 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7989 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7990 DeconvolutionOperatorTester()
7991 .batch_size(2)
7992 .input_size(kStridedInputHeight, kStridedInputWidth)
7993 .kernel_size(2, 2)
7994 .stride(2)
7995 .groups(2)
7996 .group_input_channels(17)
7997 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7998 .iterations(1)
7999 .TestQU8();
8000 }
8001 }
8002
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_input_channels)8003 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_input_channels) {
8004 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8005 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
8006 DeconvolutionOperatorTester()
8007 .batch_size(2)
8008 .input_size(kStridedInputHeight, kStridedInputWidth)
8009 .kernel_size(2, 2)
8010 .stride(2)
8011 .groups(2)
8012 .group_input_channels(input_channels)
8013 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8014 .iterations(1)
8015 .TestQU8();
8016 }
8017 }
8018
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_output_channels)8019 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_output_channels) {
8020 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8021 for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
8022 DeconvolutionOperatorTester()
8023 .batch_size(2)
8024 .input_size(kStridedInputHeight, kStridedInputWidth)
8025 .kernel_size(2, 2)
8026 .stride(2)
8027 .groups(2)
8028 .group_input_channels(17)
8029 .group_output_channels(output_channels)
8030 .iterations(1)
8031 .TestQU8();
8032 }
8033 }
8034
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_input_stride)8035 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_input_stride) {
8036 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8037 DeconvolutionOperatorTester()
8038 .batch_size(2)
8039 .input_size(kStridedInputHeight, kStridedInputWidth)
8040 .kernel_size(2, 2)
8041 .stride(2)
8042 .groups(2)
8043 .group_input_channels(17)
8044 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8045 .input_pixel_stride(37)
8046 .iterations(3)
8047 .TestQU8();
8048 }
8049
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_output_stride)8050 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_output_stride) {
8051 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8052 DeconvolutionOperatorTester()
8053 .batch_size(2)
8054 .input_size(kStridedInputHeight, kStridedInputWidth)
8055 .kernel_size(2, 2)
8056 .stride(2)
8057 .groups(2)
8058 .group_input_channels(17)
8059 .group_output_channels(xnn_params.qu8.gemm.nr + 3)
8060 .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
8061 .iterations(3)
8062 .TestQU8();
8063 }
8064
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_qmin)8065 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_qmin) {
8066 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8067 DeconvolutionOperatorTester()
8068 .batch_size(2)
8069 .input_size(kStridedInputHeight, kStridedInputWidth)
8070 .kernel_size(2, 2)
8071 .stride(2)
8072 .groups(2)
8073 .group_input_channels(17)
8074 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8075 .qmin(128)
8076 .iterations(3)
8077 .TestQU8();
8078 }
8079
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_qmax)8080 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_qmax) {
8081 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8082 DeconvolutionOperatorTester()
8083 .batch_size(2)
8084 .input_size(kStridedInputHeight, kStridedInputWidth)
8085 .kernel_size(2, 2)
8086 .stride(2)
8087 .groups(2)
8088 .group_input_channels(17)
8089 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8090 .qmax(128)
8091 .iterations(3)
8092 .TestQU8();
8093 }
8094
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_without_bias)8095 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_without_bias) {
8096 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8097 DeconvolutionOperatorTester()
8098 .has_bias(false)
8099 .batch_size(2)
8100 .input_size(kStridedInputHeight, kStridedInputWidth)
8101 .kernel_size(2, 2)
8102 .stride(2)
8103 .groups(2)
8104 .group_input_channels(17)
8105 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8106 .iterations(3)
8107 .TestQU8();
8108 }
8109
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_grouped_2x2s2)8110 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_grouped_2x2s2) {
8111 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8112 DeconvolutionOperatorTester()
8113 .batch_size(2)
8114 .input_size(kStridedInputHeight, kStridedInputWidth)
8115 .kernel_size(2, 2)
8116 .stride(2)
8117 .groups(2)
8118 .group_input_channels(17)
8119 .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8120 .use_weights_cache(true)
8121 .iterations(3)
8122 .TestQU8();
8123 }
8124
8125 /**************************** SUBCONV2D/GEMM path, setup ****************************/
8126
8127 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_setup_changing_batch) {
8128 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8129 DeconvolutionOperatorTester()
8130 .batch_size(2)
8131 .next_batch_size(5)
8132 .input_size(kStridedInputHeight, kStridedInputWidth)
8133 .kernel_size(2, 2)
8134 .stride(2)
8135 .groups(2)
8136 .group_input_channels(15)
8137 .group_output_channels(17)
8138 .TestSetupQU8();
8139 }
8140
8141 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_setup_changing_height) {
8142 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8143 DeconvolutionOperatorTester()
8144 .batch_size(2)
8145 .input_size(kStridedInputHeight, kStridedInputWidth)
8146 .next_input_height(kStridedInputHeight + 3)
8147 .kernel_size(2, 2)
8148 .stride(2)
8149 .groups(2)
8150 .group_input_channels(15)
8151 .group_output_channels(17)
8152 .TestSetupQU8();
8153 }
8154
8155 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_setup_changing_width) {
8156 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8157 DeconvolutionOperatorTester()
8158 .batch_size(2)
8159 .input_size(kStridedInputHeight, kStridedInputWidth)
8160 .next_input_width(kStridedInputWidth + 3)
8161 .kernel_size(2, 2)
8162 .stride(2)
8163 .groups(2)
8164 .group_input_channels(15)
8165 .group_output_channels(17)
8166 .TestSetupQU8();
8167 }
8168
8169 /**************************** Future GEMM path ****************************/
8170
8171 TEST(DECONVOLUTION_NHWC_F16, 1x1) {
8172 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8173 DeconvolutionOperatorTester()
8174 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8175 .kernel_size(1, 1)
8176 .group_input_channels(23)
8177 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8178 .iterations(3)
8179 .TestF16();
8180 }
8181
8182 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_fp32_weights) {
8183 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8184 DeconvolutionOperatorTester()
8185 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8186 .kernel_size(1, 1)
8187 .group_input_channels(23)
8188 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8189 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8190 .iterations(3)
8191 .TestF16();
8192 }
8193
8194 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_input_width) {
8195 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8196 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8197 DeconvolutionOperatorTester()
8198 .input_size(input_height, kUnstridedInputWidth)
8199 .kernel_size(1, 1)
8200 .group_input_channels(23)
8201 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8202 .iterations(1)
8203 .TestF16();
8204 }
8205 }
8206
8207 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_input_height) {
8208 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8209 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8210 DeconvolutionOperatorTester()
8211 .input_size(kUnstridedInputHeight, input_width)
8212 .kernel_size(1, 1)
8213 .group_input_channels(23)
8214 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8215 .iterations(1)
8216 .TestF16();
8217 }
8218 }
8219
8220 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_input_channels) {
8221 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8222 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8223 DeconvolutionOperatorTester()
8224 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8225 .kernel_size(1, 1)
8226 .group_input_channels(input_channels)
8227 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8228 .iterations(1)
8229 .TestF16();
8230 }
8231 }
8232
8233 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_output_channels) {
8234 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8235 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8236 DeconvolutionOperatorTester()
8237 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8238 .kernel_size(1, 1)
8239 .group_input_channels(23)
8240 .group_output_channels(output_channels)
8241 .iterations(1)
8242 .TestF16();
8243 }
8244 }
8245
8246 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_input_stride) {
8247 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8248 DeconvolutionOperatorTester()
8249 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8250 .kernel_size(1, 1)
8251 .group_input_channels(23)
8252 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8253 .input_pixel_stride(28)
8254 .iterations(3)
8255 .TestF16();
8256 }
8257
8258 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_output_stride) {
8259 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8260 DeconvolutionOperatorTester()
8261 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8262 .kernel_size(1, 1)
8263 .group_input_channels(23)
8264 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8265 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8266 .iterations(3)
8267 .TestF16();
8268 }
8269
8270 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_qmin) {
8271 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8272 DeconvolutionOperatorTester()
8273 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8274 .kernel_size(1, 1)
8275 .group_input_channels(23)
8276 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8277 .qmin(128)
8278 .iterations(3)
8279 .TestF16();
8280 }
8281
8282 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_qmax) {
8283 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8284 DeconvolutionOperatorTester()
8285 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8286 .kernel_size(1, 1)
8287 .group_input_channels(23)
8288 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8289 .qmax(128)
8290 .iterations(3)
8291 .TestF16();
8292 }
8293
8294 TEST(DECONVOLUTION_NHWC_F16, 1x1_without_bias) {
8295 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8296 DeconvolutionOperatorTester()
8297 .has_bias(false)
8298 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8299 .kernel_size(1, 1)
8300 .group_input_channels(23)
8301 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8302 .iterations(3)
8303 .TestF16();
8304 }
8305
8306 /**************************** Future GEMM path, grouped ****************************/
8307
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1)8308 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1) {
8309 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8310 DeconvolutionOperatorTester()
8311 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8312 .kernel_size(1, 1)
8313 .groups(2)
8314 .group_input_channels(23)
8315 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8316 .iterations(3)
8317 .TestF16();
8318 }
8319
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_fp32_weights)8320 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_fp32_weights) {
8321 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8322 DeconvolutionOperatorTester()
8323 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8324 .kernel_size(1, 1)
8325 .groups(2)
8326 .group_input_channels(23)
8327 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8328 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8329 .iterations(3)
8330 .TestF16();
8331 }
8332
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_input_width)8333 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_input_width) {
8334 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8335 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8336 DeconvolutionOperatorTester()
8337 .input_size(input_height, kUnstridedInputWidth)
8338 .kernel_size(1, 1)
8339 .groups(2)
8340 .group_input_channels(23)
8341 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8342 .iterations(1)
8343 .TestF16();
8344 }
8345 }
8346
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_input_height)8347 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_input_height) {
8348 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8349 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8350 DeconvolutionOperatorTester()
8351 .input_size(kUnstridedInputHeight, input_width)
8352 .kernel_size(1, 1)
8353 .groups(2)
8354 .group_input_channels(23)
8355 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8356 .iterations(1)
8357 .TestF16();
8358 }
8359 }
8360
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_input_channels)8361 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_input_channels) {
8362 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8363 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8364 DeconvolutionOperatorTester()
8365 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8366 .kernel_size(1, 1)
8367 .groups(2)
8368 .group_input_channels(input_channels)
8369 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8370 .iterations(1)
8371 .TestF16();
8372 }
8373 }
8374
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_output_channels)8375 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_output_channels) {
8376 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8377 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8378 DeconvolutionOperatorTester()
8379 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8380 .kernel_size(1, 1)
8381 .groups(2)
8382 .group_input_channels(23)
8383 .group_output_channels(output_channels)
8384 .iterations(1)
8385 .TestF16();
8386 }
8387 }
8388
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_input_stride)8389 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_input_stride) {
8390 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8391 DeconvolutionOperatorTester()
8392 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8393 .kernel_size(1, 1)
8394 .groups(2)
8395 .group_input_channels(23)
8396 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8397 .input_pixel_stride(47)
8398 .iterations(3)
8399 .TestF16();
8400 }
8401
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_output_stride)8402 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_output_stride) {
8403 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8404 DeconvolutionOperatorTester()
8405 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8406 .kernel_size(1, 1)
8407 .groups(2)
8408 .group_input_channels(23)
8409 .group_output_channels(xnn_params.f16.gemm.nr + 3)
8410 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8411 .iterations(3)
8412 .TestF16();
8413 }
8414
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_qmin)8415 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_qmin) {
8416 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8417 DeconvolutionOperatorTester()
8418 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8419 .kernel_size(1, 1)
8420 .groups(2)
8421 .group_input_channels(23)
8422 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8423 .qmin(128)
8424 .iterations(3)
8425 .TestF16();
8426 }
8427
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_qmax)8428 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_qmax) {
8429 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8430 DeconvolutionOperatorTester()
8431 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8432 .kernel_size(1, 1)
8433 .groups(2)
8434 .group_input_channels(23)
8435 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8436 .qmax(128)
8437 .iterations(3)
8438 .TestF16();
8439 }
8440
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_without_bias)8441 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_without_bias) {
8442 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8443 DeconvolutionOperatorTester()
8444 .has_bias(false)
8445 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8446 .kernel_size(1, 1)
8447 .groups(2)
8448 .group_input_channels(23)
8449 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8450 .iterations(3)
8451 .TestF16();
8452 }
8453
8454 /**************************** Future GEMM path, batched ****************************/
8455
TEST(DECONVOLUTION_NHWC_F16,batched_1x1)8456 TEST(DECONVOLUTION_NHWC_F16, batched_1x1) {
8457 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8458 DeconvolutionOperatorTester()
8459 .batch_size(2)
8460 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8461 .kernel_size(1, 1)
8462 .group_input_channels(23)
8463 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8464 .iterations(3)
8465 .TestF16();
8466 }
8467
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_fp32_weights)8468 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_fp32_weights) {
8469 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8470 DeconvolutionOperatorTester()
8471 .batch_size(2)
8472 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8473 .kernel_size(1, 1)
8474 .group_input_channels(23)
8475 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8476 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8477 .iterations(3)
8478 .TestF16();
8479 }
8480
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_input_width)8481 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_input_width) {
8482 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8483 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8484 DeconvolutionOperatorTester()
8485 .batch_size(2)
8486 .input_size(input_height, kUnstridedInputWidth)
8487 .kernel_size(1, 1)
8488 .group_input_channels(23)
8489 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8490 .iterations(1)
8491 .TestF16();
8492 }
8493 }
8494
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_input_height)8495 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_input_height) {
8496 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8497 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8498 DeconvolutionOperatorTester()
8499 .batch_size(2)
8500 .input_size(kUnstridedInputHeight, input_width)
8501 .kernel_size(1, 1)
8502 .group_input_channels(23)
8503 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8504 .iterations(1)
8505 .TestF16();
8506 }
8507 }
8508
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_input_channels)8509 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_input_channels) {
8510 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8511 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8512 DeconvolutionOperatorTester()
8513 .batch_size(2)
8514 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8515 .kernel_size(1, 1)
8516 .group_input_channels(input_channels)
8517 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8518 .iterations(1)
8519 .TestF16();
8520 }
8521 }
8522
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_output_channels)8523 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_output_channels) {
8524 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8525 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8526 DeconvolutionOperatorTester()
8527 .batch_size(2)
8528 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8529 .kernel_size(1, 1)
8530 .group_input_channels(23)
8531 .group_output_channels(output_channels)
8532 .iterations(1)
8533 .TestF16();
8534 }
8535 }
8536
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_input_stride)8537 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_input_stride) {
8538 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8539 DeconvolutionOperatorTester()
8540 .batch_size(2)
8541 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8542 .kernel_size(1, 1)
8543 .group_input_channels(23)
8544 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8545 .input_pixel_stride(28)
8546 .iterations(3)
8547 .TestF16();
8548 }
8549
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_output_stride)8550 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_output_stride) {
8551 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8552 DeconvolutionOperatorTester()
8553 .batch_size(2)
8554 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8555 .kernel_size(1, 1)
8556 .group_input_channels(23)
8557 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8558 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8559 .iterations(3)
8560 .TestF16();
8561 }
8562
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_qmin)8563 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_qmin) {
8564 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8565 DeconvolutionOperatorTester()
8566 .batch_size(2)
8567 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8568 .kernel_size(1, 1)
8569 .group_input_channels(23)
8570 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8571 .qmin(128)
8572 .iterations(3)
8573 .TestF16();
8574 }
8575
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_qmax)8576 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_qmax) {
8577 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8578 DeconvolutionOperatorTester()
8579 .batch_size(2)
8580 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8581 .kernel_size(1, 1)
8582 .group_input_channels(23)
8583 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8584 .qmax(128)
8585 .iterations(3)
8586 .TestF16();
8587 }
8588
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_without_bias)8589 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_without_bias) {
8590 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8591 DeconvolutionOperatorTester()
8592 .has_bias(false)
8593 .batch_size(2)
8594 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8595 .kernel_size(1, 1)
8596 .group_input_channels(23)
8597 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8598 .iterations(3)
8599 .TestF16();
8600 }
8601
8602 /**************************** Future GEMM path, batched, grouped ****************************/
8603
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1)8604 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1) {
8605 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8606 DeconvolutionOperatorTester()
8607 .batch_size(2)
8608 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8609 .kernel_size(1, 1)
8610 .groups(2)
8611 .group_input_channels(23)
8612 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8613 .iterations(3)
8614 .TestF16();
8615 }
8616
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_fp32_weights)8617 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_fp32_weights) {
8618 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8619 DeconvolutionOperatorTester()
8620 .batch_size(2)
8621 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8622 .kernel_size(1, 1)
8623 .groups(2)
8624 .group_input_channels(23)
8625 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8626 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8627 .iterations(3)
8628 .TestF16();
8629 }
8630
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_input_width)8631 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_input_width) {
8632 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8633 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8634 DeconvolutionOperatorTester()
8635 .batch_size(2)
8636 .input_size(input_height, kUnstridedInputWidth)
8637 .kernel_size(1, 1)
8638 .groups(2)
8639 .group_input_channels(23)
8640 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8641 .iterations(1)
8642 .TestF16();
8643 }
8644 }
8645
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_input_height)8646 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_input_height) {
8647 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8648 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8649 DeconvolutionOperatorTester()
8650 .batch_size(2)
8651 .input_size(kUnstridedInputHeight, input_width)
8652 .kernel_size(1, 1)
8653 .groups(2)
8654 .group_input_channels(23)
8655 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8656 .iterations(1)
8657 .TestF16();
8658 }
8659 }
8660
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_input_channels)8661 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_input_channels) {
8662 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8663 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8664 DeconvolutionOperatorTester()
8665 .batch_size(2)
8666 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8667 .kernel_size(1, 1)
8668 .groups(2)
8669 .group_input_channels(input_channels)
8670 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8671 .iterations(1)
8672 .TestF16();
8673 }
8674 }
8675
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_output_channels)8676 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_output_channels) {
8677 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8678 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8679 DeconvolutionOperatorTester()
8680 .batch_size(2)
8681 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8682 .kernel_size(1, 1)
8683 .groups(2)
8684 .group_input_channels(23)
8685 .group_output_channels(output_channels)
8686 .iterations(1)
8687 .TestF16();
8688 }
8689 }
8690
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_input_stride)8691 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_input_stride) {
8692 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8693 DeconvolutionOperatorTester()
8694 .batch_size(2)
8695 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8696 .kernel_size(1, 1)
8697 .groups(2)
8698 .group_input_channels(23)
8699 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8700 .input_pixel_stride(47)
8701 .iterations(3)
8702 .TestF16();
8703 }
8704
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_output_stride)8705 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_output_stride) {
8706 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8707 DeconvolutionOperatorTester()
8708 .batch_size(2)
8709 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8710 .kernel_size(1, 1)
8711 .groups(2)
8712 .group_input_channels(23)
8713 .group_output_channels(xnn_params.f16.gemm.nr + 3)
8714 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8715 .iterations(3)
8716 .TestF16();
8717 }
8718
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_qmin)8719 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_qmin) {
8720 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8721 DeconvolutionOperatorTester()
8722 .batch_size(2)
8723 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8724 .kernel_size(1, 1)
8725 .groups(2)
8726 .group_input_channels(23)
8727 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8728 .qmin(128)
8729 .iterations(3)
8730 .TestF16();
8731 }
8732
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_qmax)8733 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_qmax) {
8734 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8735 DeconvolutionOperatorTester()
8736 .batch_size(2)
8737 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8738 .kernel_size(1, 1)
8739 .groups(2)
8740 .group_input_channels(23)
8741 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8742 .qmax(128)
8743 .iterations(3)
8744 .TestF16();
8745 }
8746
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_without_bias)8747 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_without_bias) {
8748 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8749 DeconvolutionOperatorTester()
8750 .has_bias(false)
8751 .batch_size(2)
8752 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8753 .kernel_size(1, 1)
8754 .groups(2)
8755 .group_input_channels(23)
8756 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8757 .iterations(3)
8758 .TestF16();
8759 }
8760
8761 /**************************** CONV path ****************************/
8762
8763 TEST(DECONVOLUTION_NHWC_F16, 3x3) {
8764 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8765 DeconvolutionOperatorTester()
8766 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8767 .padding(1)
8768 .kernel_size(3, 3)
8769 .group_input_channels(15)
8770 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8771 .iterations(3)
8772 .TestF16();
8773 }
8774
8775 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_fp32_weights) {
8776 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8777 DeconvolutionOperatorTester()
8778 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8779 .padding(1)
8780 .kernel_size(3, 3)
8781 .group_input_channels(15)
8782 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8783 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8784 .iterations(3)
8785 .TestF16();
8786 }
8787
TEST(DECONVOLUTION_NHWC_F16,Kx3)8788 TEST(DECONVOLUTION_NHWC_F16, Kx3) {
8789 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8790 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
8791 DeconvolutionOperatorTester()
8792 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8793 .padding_width(1)
8794 .kernel_size(kernel_height, 3)
8795 .group_input_channels(17)
8796 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8797 .iterations(3)
8798 .TestF16();
8799 }
8800 }
8801
8802 TEST(DECONVOLUTION_NHWC_F16, 3xK) {
8803 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8804 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
8805 DeconvolutionOperatorTester()
8806 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8807 .padding_height(1)
8808 .kernel_size(3, kernel_width)
8809 .group_input_channels(17)
8810 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8811 .iterations(3)
8812 .TestF16();
8813 }
8814 }
8815
8816 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_height_padding) {
8817 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8818 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
8819 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
8820 DeconvolutionOperatorTester()
8821 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8822 .padding_width(1)
8823 .padding_top(padding_top)
8824 .padding_bottom(padding_bottom)
8825 .kernel_size(3, 3)
8826 .group_input_channels(15)
8827 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8828 .iterations(1)
8829 .TestF16();
8830 }
8831 }
8832 }
8833
8834 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_width_padding) {
8835 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8836 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
8837 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
8838 DeconvolutionOperatorTester()
8839 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8840 .padding_height(1)
8841 .padding_left(padding_left)
8842 .padding_right(padding_right)
8843 .kernel_size(3, 3)
8844 .group_input_channels(15)
8845 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8846 .iterations(1)
8847 .TestF16();
8848 }
8849 }
8850 }
8851
8852 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_height_adjustment) {
8853 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8854 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
8855 DeconvolutionOperatorTester()
8856 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8857 .padding(1)
8858 .stride_height(adjustment_height + 1)
8859 .adjustment_height(adjustment_height)
8860 .kernel_size(3, 3)
8861 .group_input_channels(15)
8862 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8863 .iterations(1)
8864 .TestF16();
8865 }
8866 }
8867
8868 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_width_adjustment) {
8869 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8870 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
8871 DeconvolutionOperatorTester()
8872 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8873 .padding(1)
8874 .stride_width(adjustment_width + 1)
8875 .adjustment_width(adjustment_width)
8876 .kernel_size(3, 3)
8877 .group_input_channels(15)
8878 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8879 .iterations(1)
8880 .TestF16();
8881 }
8882 }
8883
8884 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_input_height) {
8885 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8886 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8887 DeconvolutionOperatorTester()
8888 .input_size(input_height, kUnstridedInputWidth)
8889 .padding(1)
8890 .kernel_size(3, 3)
8891 .group_input_channels(15)
8892 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8893 .iterations(1)
8894 .TestF16();
8895 }
8896 }
8897
8898 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_input_width) {
8899 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8900 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8901 DeconvolutionOperatorTester()
8902 .input_size(kUnstridedInputHeight, input_width)
8903 .padding(1)
8904 .kernel_size(3, 3)
8905 .group_input_channels(15)
8906 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8907 .iterations(1)
8908 .TestF16();
8909 }
8910 }
8911
8912 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_input_channels) {
8913 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8914 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8915 DeconvolutionOperatorTester()
8916 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8917 .padding(1)
8918 .kernel_size(3, 3)
8919 .group_input_channels(input_channels)
8920 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8921 .iterations(1)
8922 .TestF16();
8923 }
8924 }
8925
8926 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_output_channels) {
8927 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8928 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8929 DeconvolutionOperatorTester()
8930 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8931 .padding(1)
8932 .kernel_size(3, 3)
8933 .group_input_channels(23)
8934 .group_output_channels(output_channels)
8935 .iterations(1)
8936 .TestF16();
8937 }
8938 }
8939
8940 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_height_dilation) {
8941 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8942 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
8943 DeconvolutionOperatorTester()
8944 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8945 .padding(1)
8946 .kernel_size(3, 3)
8947 .dilation_height(dilation_height)
8948 .group_input_channels(23)
8949 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8950 .iterations(3)
8951 .TestF16();
8952 }
8953 }
8954
8955 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_width_dilation) {
8956 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8957 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
8958 DeconvolutionOperatorTester()
8959 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8960 .padding(1)
8961 .kernel_size(3, 3)
8962 .dilation_width(dilation_width)
8963 .group_input_channels(23)
8964 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8965 .iterations(3)
8966 .TestF16();
8967 }
8968 }
8969
8970 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_height_dilation_and_stride) {
8971 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8972 DeconvolutionOperatorTester()
8973 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8974 .padding(1)
8975 .kernel_size(3, 3)
8976 .dilation_height(3)
8977 .stride_height(2)
8978 .group_input_channels(23)
8979 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8980 .iterations(3)
8981 .TestF16();
8982 }
8983
8984 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_width_dilation_and_stride) {
8985 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8986 DeconvolutionOperatorTester()
8987 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8988 .padding(1)
8989 .kernel_size(3, 3)
8990 .dilation_width(3)
8991 .stride_width(2)
8992 .group_input_channels(23)
8993 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8994 .iterations(3)
8995 .TestF16();
8996 }
8997
8998 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_input_stride) {
8999 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9000 DeconvolutionOperatorTester()
9001 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9002 .padding(1)
9003 .kernel_size(3, 3)
9004 .group_input_channels(23)
9005 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9006 .input_pixel_stride(28)
9007 .iterations(3)
9008 .TestF16();
9009 }
9010
9011 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_output_stride) {
9012 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9013 DeconvolutionOperatorTester()
9014 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9015 .padding(1)
9016 .kernel_size(3, 3)
9017 .group_input_channels(23)
9018 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9019 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
9020 .iterations(3)
9021 .TestF16();
9022 }
9023
9024 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_qmin) {
9025 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9026 DeconvolutionOperatorTester()
9027 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9028 .padding(1)
9029 .kernel_size(3, 3)
9030 .group_input_channels(23)
9031 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9032 .qmin(128)
9033 .iterations(3)
9034 .TestF16();
9035 }
9036
9037 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_qmax) {
9038 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9039 DeconvolutionOperatorTester()
9040 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9041 .padding(1)
9042 .kernel_size(3, 3)
9043 .group_input_channels(23)
9044 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9045 .qmax(128)
9046 .iterations(3)
9047 .TestF16();
9048 }
9049
9050 TEST(DECONVOLUTION_NHWC_F16, 3x3_without_bias) {
9051 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9052 DeconvolutionOperatorTester()
9053 .has_bias(false)
9054 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9055 .padding(1)
9056 .kernel_size(3, 3)
9057 .group_input_channels(23)
9058 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9059 .iterations(3)
9060 .TestF16();
9061 }
9062
TEST(DECONVOLUTION_NHWC_F16,weights_cache_3x3)9063 TEST(DECONVOLUTION_NHWC_F16, weights_cache_3x3) {
9064 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9065 DeconvolutionOperatorTester()
9066 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9067 .padding(1)
9068 .kernel_size(3, 3)
9069 .group_input_channels(15)
9070 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9071 .use_weights_cache(true)
9072 .iterations(3)
9073 .TestF16();
9074 }
9075
9076 /**************************** CONV path, grouped ****************************/
9077
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3)9078 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3) {
9079 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9080 DeconvolutionOperatorTester()
9081 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9082 .padding(1)
9083 .kernel_size(3, 3)
9084 .groups(2)
9085 .group_input_channels(15)
9086 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9087 .iterations(3)
9088 .TestF16();
9089 }
9090
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_fp32_weights)9091 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_fp32_weights) {
9092 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9093 DeconvolutionOperatorTester()
9094 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9095 .padding(1)
9096 .kernel_size(3, 3)
9097 .groups(2)
9098 .group_input_channels(15)
9099 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9100 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
9101 .iterations(3)
9102 .TestF16();
9103 }
9104
TEST(DECONVOLUTION_NHWC_F16,grouped_Kx3)9105 TEST(DECONVOLUTION_NHWC_F16, grouped_Kx3) {
9106 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9107 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
9108 DeconvolutionOperatorTester()
9109 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9110 .padding_width(1)
9111 .kernel_size(kernel_height, 3)
9112 .groups(2)
9113 .group_input_channels(17)
9114 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9115 .iterations(3)
9116 .TestF16();
9117 }
9118 }
9119
TEST(DECONVOLUTION_NHWC_F16,grouped_3xK)9120 TEST(DECONVOLUTION_NHWC_F16, grouped_3xK) {
9121 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9122 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
9123 DeconvolutionOperatorTester()
9124 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9125 .padding_height(1)
9126 .kernel_size(3, kernel_width)
9127 .groups(2)
9128 .group_input_channels(17)
9129 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9130 .iterations(3)
9131 .TestF16();
9132 }
9133 }
9134
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_height_padding)9135 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_height_padding) {
9136 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9137 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
9138 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
9139 DeconvolutionOperatorTester()
9140 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9141 .padding_width(1)
9142 .padding_top(padding_top)
9143 .padding_bottom(padding_bottom)
9144 .kernel_size(3, 3)
9145 .groups(2)
9146 .group_input_channels(15)
9147 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9148 .iterations(1)
9149 .TestF16();
9150 }
9151 }
9152 }
9153
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_width_padding)9154 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_width_padding) {
9155 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9156 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
9157 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
9158 DeconvolutionOperatorTester()
9159 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9160 .padding_height(1)
9161 .padding_left(padding_left)
9162 .padding_right(padding_right)
9163 .kernel_size(3, 3)
9164 .groups(2)
9165 .group_input_channels(15)
9166 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9167 .iterations(1)
9168 .TestF16();
9169 }
9170 }
9171 }
9172
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_height_adjustment)9173 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_height_adjustment) {
9174 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9175 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
9176 DeconvolutionOperatorTester()
9177 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9178 .padding(1)
9179 .stride_height(adjustment_height + 1)
9180 .adjustment_height(adjustment_height)
9181 .kernel_size(3, 3)
9182 .groups(2)
9183 .group_input_channels(15)
9184 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9185 .iterations(1)
9186 .TestF16();
9187 }
9188 }
9189
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_width_adjustment)9190 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_width_adjustment) {
9191 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9192 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
9193 DeconvolutionOperatorTester()
9194 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9195 .padding(1)
9196 .stride_width(adjustment_width + 1)
9197 .adjustment_width(adjustment_width)
9198 .kernel_size(3, 3)
9199 .groups(2)
9200 .group_input_channels(15)
9201 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9202 .iterations(1)
9203 .TestF16();
9204 }
9205 }
9206
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_input_height)9207 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_input_height) {
9208 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9209 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
9210 DeconvolutionOperatorTester()
9211 .input_size(input_height, kUnstridedInputWidth)
9212 .padding(1)
9213 .kernel_size(3, 3)
9214 .groups(2)
9215 .group_input_channels(15)
9216 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9217 .iterations(1)
9218 .TestF16();
9219 }
9220 }
9221
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_input_width)9222 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_input_width) {
9223 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9224 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
9225 DeconvolutionOperatorTester()
9226 .input_size(kUnstridedInputHeight, input_width)
9227 .padding(1)
9228 .kernel_size(3, 3)
9229 .groups(2)
9230 .group_input_channels(15)
9231 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9232 .iterations(1)
9233 .TestF16();
9234 }
9235 }
9236
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_input_channels)9237 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_input_channels) {
9238 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9239 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
9240 DeconvolutionOperatorTester()
9241 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9242 .padding(1)
9243 .kernel_size(3, 3)
9244 .groups(2)
9245 .group_input_channels(input_channels)
9246 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9247 .iterations(1)
9248 .TestF16();
9249 }
9250 }
9251
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_output_channels)9252 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_output_channels) {
9253 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9254 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
9255 DeconvolutionOperatorTester()
9256 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9257 .padding(1)
9258 .kernel_size(3, 3)
9259 .groups(2)
9260 .group_input_channels(23)
9261 .group_output_channels(output_channels)
9262 .iterations(1)
9263 .TestF16();
9264 }
9265 }
9266
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_height_dilation)9267 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_height_dilation) {
9268 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9269 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
9270 DeconvolutionOperatorTester()
9271 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9272 .padding(1)
9273 .kernel_size(3, 3)
9274 .dilation_height(dilation_height)
9275 .groups(2)
9276 .group_input_channels(23)
9277 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9278 .iterations(3)
9279 .TestF16();
9280 }
9281 }
9282
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_width_dilation)9283 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_width_dilation) {
9284 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9285 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
9286 DeconvolutionOperatorTester()
9287 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9288 .padding(1)
9289 .kernel_size(3, 3)
9290 .dilation_width(dilation_width)
9291 .groups(2)
9292 .group_input_channels(23)
9293 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9294 .iterations(3)
9295 .TestF16();
9296 }
9297 }
9298
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_height_dilation_and_stride)9299 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_height_dilation_and_stride) {
9300 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9301 DeconvolutionOperatorTester()
9302 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9303 .padding(1)
9304 .kernel_size(3, 3)
9305 .dilation_height(3)
9306 .stride_height(2)
9307 .groups(2)
9308 .group_input_channels(23)
9309 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9310 .iterations(3)
9311 .TestF16();
9312 }
9313
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_width_dilation_and_stride)9314 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_width_dilation_and_stride) {
9315 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9316 DeconvolutionOperatorTester()
9317 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9318 .padding(1)
9319 .kernel_size(3, 3)
9320 .dilation_width(3)
9321 .stride_width(2)
9322 .groups(2)
9323 .group_input_channels(23)
9324 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9325 .iterations(3)
9326 .TestF16();
9327 }
9328
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_input_stride)9329 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_input_stride) {
9330 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9331 DeconvolutionOperatorTester()
9332 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9333 .padding(1)
9334 .kernel_size(3, 3)
9335 .groups(2)
9336 .group_input_channels(23)
9337 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9338 .input_pixel_stride(47)
9339 .iterations(3)
9340 .TestF16();
9341 }
9342
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_output_stride)9343 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_output_stride) {
9344 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9345 DeconvolutionOperatorTester()
9346 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9347 .padding(1)
9348 .kernel_size(3, 3)
9349 .groups(2)
9350 .group_input_channels(23)
9351 .group_output_channels(xnn_params.f16.gemm.nr + 3)
9352 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
9353 .iterations(3)
9354 .TestF16();
9355 }
9356
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_qmin)9357 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_qmin) {
9358 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9359 DeconvolutionOperatorTester()
9360 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9361 .padding(1)
9362 .kernel_size(3, 3)
9363 .groups(2)
9364 .group_input_channels(23)
9365 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9366 .qmin(128)
9367 .iterations(3)
9368 .TestF16();
9369 }
9370
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_qmax)9371 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_qmax) {
9372 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9373 DeconvolutionOperatorTester()
9374 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9375 .padding(1)
9376 .kernel_size(3, 3)
9377 .groups(2)
9378 .group_input_channels(23)
9379 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9380 .qmax(128)
9381 .iterations(3)
9382 .TestF16();
9383 }
9384
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_without_bias)9385 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_without_bias) {
9386 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9387 DeconvolutionOperatorTester()
9388 .has_bias(false)
9389 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9390 .padding(1)
9391 .kernel_size(3, 3)
9392 .groups(2)
9393 .group_input_channels(23)
9394 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9395 .iterations(3)
9396 .TestF16();
9397 }
9398
TEST(DECONVOLUTION_NHWC_F16,weights_cache_grouped_3x3)9399 TEST(DECONVOLUTION_NHWC_F16, weights_cache_grouped_3x3) {
9400 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9401 DeconvolutionOperatorTester()
9402 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9403 .padding(1)
9404 .kernel_size(3, 3)
9405 .groups(2)
9406 .group_input_channels(15)
9407 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9408 .use_weights_cache(true)
9409 .iterations(3)
9410 .TestF16();
9411 }
9412
9413 /**************************** CONV path, batched ****************************/
9414
TEST(DECONVOLUTION_NHWC_F16,batched_3x3)9415 TEST(DECONVOLUTION_NHWC_F16, batched_3x3) {
9416 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9417 DeconvolutionOperatorTester()
9418 .batch_size(2)
9419 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9420 .padding(1)
9421 .kernel_size(3, 3)
9422 .group_input_channels(15)
9423 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9424 .iterations(3)
9425 .TestF16();
9426 }
9427
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_fp32_weights)9428 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_fp32_weights) {
9429 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9430 DeconvolutionOperatorTester()
9431 .batch_size(2)
9432 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9433 .padding(1)
9434 .kernel_size(3, 3)
9435 .group_input_channels(15)
9436 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9437 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
9438 .iterations(3)
9439 .TestF16();
9440 }
9441
TEST(DECONVOLUTION_NHWC_F16,batched_Kx3)9442 TEST(DECONVOLUTION_NHWC_F16, batched_Kx3) {
9443 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9444 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
9445 DeconvolutionOperatorTester()
9446 .batch_size(2)
9447 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9448 .padding_width(1)
9449 .kernel_size(kernel_height, 3)
9450 .group_input_channels(17)
9451 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9452 .iterations(3)
9453 .TestF16();
9454 }
9455 }
9456
TEST(DECONVOLUTION_NHWC_F16,batched_3xK)9457 TEST(DECONVOLUTION_NHWC_F16, batched_3xK) {
9458 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9459 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
9460 DeconvolutionOperatorTester()
9461 .batch_size(2)
9462 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9463 .padding_height(1)
9464 .kernel_size(3, kernel_width)
9465 .group_input_channels(17)
9466 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9467 .iterations(3)
9468 .TestF16();
9469 }
9470 }
9471
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_height_padding)9472 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_height_padding) {
9473 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9474 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
9475 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
9476 DeconvolutionOperatorTester()
9477 .batch_size(2)
9478 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9479 .padding_width(1)
9480 .padding_top(padding_top)
9481 .padding_bottom(padding_bottom)
9482 .kernel_size(3, 3)
9483 .group_input_channels(15)
9484 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9485 .iterations(1)
9486 .TestF16();
9487 }
9488 }
9489 }
9490
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_width_padding)9491 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_width_padding) {
9492 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9493 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
9494 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
9495 DeconvolutionOperatorTester()
9496 .batch_size(2)
9497 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9498 .padding_height(1)
9499 .padding_left(padding_left)
9500 .padding_right(padding_right)
9501 .kernel_size(3, 3)
9502 .group_input_channels(15)
9503 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9504 .iterations(1)
9505 .TestF16();
9506 }
9507 }
9508 }
9509
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_height_adjustment)9510 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_height_adjustment) {
9511 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9512 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
9513 DeconvolutionOperatorTester()
9514 .batch_size(2)
9515 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9516 .padding(1)
9517 .stride_height(adjustment_height + 1)
9518 .adjustment_height(adjustment_height)
9519 .kernel_size(3, 3)
9520 .group_input_channels(15)
9521 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9522 .iterations(1)
9523 .TestF16();
9524 }
9525 }
9526
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_width_adjustment)9527 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_width_adjustment) {
9528 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9529 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
9530 DeconvolutionOperatorTester()
9531 .batch_size(2)
9532 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9533 .padding(1)
9534 .stride_width(adjustment_width + 1)
9535 .adjustment_width(adjustment_width)
9536 .kernel_size(3, 3)
9537 .group_input_channels(15)
9538 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9539 .iterations(1)
9540 .TestF16();
9541 }
9542 }
9543
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_input_height)9544 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_input_height) {
9545 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9546 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
9547 DeconvolutionOperatorTester()
9548 .batch_size(2)
9549 .input_size(input_height, kUnstridedInputWidth)
9550 .padding(1)
9551 .kernel_size(3, 3)
9552 .group_input_channels(15)
9553 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9554 .iterations(1)
9555 .TestF16();
9556 }
9557 }
9558
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_input_width)9559 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_input_width) {
9560 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9561 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
9562 DeconvolutionOperatorTester()
9563 .batch_size(2)
9564 .input_size(kUnstridedInputHeight, input_width)
9565 .padding(1)
9566 .kernel_size(3, 3)
9567 .group_input_channels(15)
9568 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9569 .iterations(1)
9570 .TestF16();
9571 }
9572 }
9573
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_input_channels)9574 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_input_channels) {
9575 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9576 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
9577 DeconvolutionOperatorTester()
9578 .batch_size(2)
9579 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9580 .padding(1)
9581 .kernel_size(3, 3)
9582 .group_input_channels(input_channels)
9583 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9584 .iterations(1)
9585 .TestF16();
9586 }
9587 }
9588
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_output_channels)9589 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_output_channels) {
9590 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9591 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
9592 DeconvolutionOperatorTester()
9593 .batch_size(2)
9594 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9595 .padding(1)
9596 .kernel_size(3, 3)
9597 .group_input_channels(23)
9598 .group_output_channels(output_channels)
9599 .iterations(1)
9600 .TestF16();
9601 }
9602 }
9603
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_height_dilation)9604 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_height_dilation) {
9605 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9606 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
9607 DeconvolutionOperatorTester()
9608 .batch_size(2)
9609 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9610 .padding(1)
9611 .kernel_size(3, 3)
9612 .dilation_height(dilation_height)
9613 .group_input_channels(23)
9614 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9615 .iterations(3)
9616 .TestF16();
9617 }
9618 }
9619
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_width_dilation)9620 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_width_dilation) {
9621 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9622 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
9623 DeconvolutionOperatorTester()
9624 .batch_size(2)
9625 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9626 .padding(1)
9627 .kernel_size(3, 3)
9628 .dilation_width(dilation_width)
9629 .group_input_channels(23)
9630 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9631 .iterations(3)
9632 .TestF16();
9633 }
9634 }
9635
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_height_dilation_and_stride)9636 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_height_dilation_and_stride) {
9637 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9638 DeconvolutionOperatorTester()
9639 .batch_size(2)
9640 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9641 .padding(1)
9642 .kernel_size(3, 3)
9643 .dilation_height(3)
9644 .stride_height(2)
9645 .group_input_channels(23)
9646 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9647 .iterations(3)
9648 .TestF16();
9649 }
9650
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_width_dilation_and_stride)9651 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_width_dilation_and_stride) {
9652 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9653 DeconvolutionOperatorTester()
9654 .batch_size(2)
9655 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9656 .padding(1)
9657 .kernel_size(3, 3)
9658 .dilation_width(3)
9659 .stride_width(2)
9660 .group_input_channels(23)
9661 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9662 .iterations(3)
9663 .TestF16();
9664 }
9665
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_input_stride)9666 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_input_stride) {
9667 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9668 DeconvolutionOperatorTester()
9669 .batch_size(2)
9670 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9671 .padding(1)
9672 .kernel_size(3, 3)
9673 .group_input_channels(23)
9674 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9675 .input_pixel_stride(28)
9676 .iterations(3)
9677 .TestF16();
9678 }
9679
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_output_stride)9680 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_output_stride) {
9681 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9682 DeconvolutionOperatorTester()
9683 .batch_size(2)
9684 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9685 .padding(1)
9686 .kernel_size(3, 3)
9687 .group_input_channels(23)
9688 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9689 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
9690 .iterations(3)
9691 .TestF16();
9692 }
9693
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_qmin)9694 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_qmin) {
9695 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9696 DeconvolutionOperatorTester()
9697 .batch_size(2)
9698 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9699 .padding(1)
9700 .kernel_size(3, 3)
9701 .group_input_channels(23)
9702 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9703 .qmin(128)
9704 .iterations(3)
9705 .TestF16();
9706 }
9707
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_qmax)9708 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_qmax) {
9709 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9710 DeconvolutionOperatorTester()
9711 .batch_size(2)
9712 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9713 .padding(1)
9714 .kernel_size(3, 3)
9715 .group_input_channels(23)
9716 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9717 .qmax(128)
9718 .iterations(3)
9719 .TestF16();
9720 }
9721
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_without_bias)9722 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_without_bias) {
9723 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9724 DeconvolutionOperatorTester()
9725 .has_bias(false)
9726 .batch_size(2)
9727 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9728 .padding(1)
9729 .kernel_size(3, 3)
9730 .group_input_channels(23)
9731 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9732 .iterations(3)
9733 .TestF16();
9734 }
9735
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_3x3)9736 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_3x3) {
9737 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9738 DeconvolutionOperatorTester()
9739 .batch_size(2)
9740 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9741 .padding(1)
9742 .kernel_size(3, 3)
9743 .group_input_channels(15)
9744 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9745 .use_weights_cache(true)
9746 .iterations(3)
9747 .TestF16();
9748 }
9749
9750 /**************************** CONV path, grouped, batched ****************************/
9751
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3)9752 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3) {
9753 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9754 DeconvolutionOperatorTester()
9755 .batch_size(2)
9756 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9757 .padding(1)
9758 .kernel_size(3, 3)
9759 .groups(2)
9760 .group_input_channels(15)
9761 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9762 .iterations(3)
9763 .TestF16();
9764 }
9765
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_fp32_weights)9766 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_fp32_weights) {
9767 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9768 DeconvolutionOperatorTester()
9769 .batch_size(2)
9770 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9771 .padding(1)
9772 .kernel_size(3, 3)
9773 .groups(2)
9774 .group_input_channels(15)
9775 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9776 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
9777 .iterations(3)
9778 .TestF16();
9779 }
9780
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_Kx3)9781 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_Kx3) {
9782 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9783 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
9784 DeconvolutionOperatorTester()
9785 .batch_size(2)
9786 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9787 .padding_width(1)
9788 .kernel_size(kernel_height, 3)
9789 .groups(2)
9790 .group_input_channels(17)
9791 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9792 .iterations(3)
9793 .TestF16();
9794 }
9795 }
9796
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3xK)9797 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3xK) {
9798 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9799 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
9800 DeconvolutionOperatorTester()
9801 .batch_size(2)
9802 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9803 .padding_height(1)
9804 .kernel_size(3, kernel_width)
9805 .groups(2)
9806 .group_input_channels(17)
9807 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9808 .iterations(3)
9809 .TestF16();
9810 }
9811 }
9812
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_height_padding)9813 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_height_padding) {
9814 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9815 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
9816 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
9817 DeconvolutionOperatorTester()
9818 .batch_size(2)
9819 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9820 .padding_width(1)
9821 .padding_top(padding_top)
9822 .padding_bottom(padding_bottom)
9823 .kernel_size(3, 3)
9824 .groups(2)
9825 .group_input_channels(15)
9826 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9827 .iterations(1)
9828 .TestF16();
9829 }
9830 }
9831 }
9832
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_width_padding)9833 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_width_padding) {
9834 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9835 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
9836 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
9837 DeconvolutionOperatorTester()
9838 .batch_size(2)
9839 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9840 .padding_height(1)
9841 .padding_left(padding_left)
9842 .padding_right(padding_right)
9843 .kernel_size(3, 3)
9844 .groups(2)
9845 .group_input_channels(15)
9846 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9847 .iterations(1)
9848 .TestF16();
9849 }
9850 }
9851 }
9852
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_height_adjustment)9853 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_height_adjustment) {
9854 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9855 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
9856 DeconvolutionOperatorTester()
9857 .batch_size(2)
9858 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9859 .padding(1)
9860 .stride_height(adjustment_height + 1)
9861 .adjustment_height(adjustment_height)
9862 .kernel_size(3, 3)
9863 .groups(2)
9864 .group_input_channels(15)
9865 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9866 .iterations(1)
9867 .TestF16();
9868 }
9869 }
9870
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_width_adjustment)9871 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_width_adjustment) {
9872 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9873 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
9874 DeconvolutionOperatorTester()
9875 .batch_size(2)
9876 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9877 .padding(1)
9878 .stride_width(adjustment_width + 1)
9879 .adjustment_width(adjustment_width)
9880 .kernel_size(3, 3)
9881 .groups(2)
9882 .group_input_channels(15)
9883 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9884 .iterations(1)
9885 .TestF16();
9886 }
9887 }
9888
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_input_height)9889 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_input_height) {
9890 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9891 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
9892 DeconvolutionOperatorTester()
9893 .batch_size(2)
9894 .input_size(input_height, kUnstridedInputWidth)
9895 .padding(1)
9896 .kernel_size(3, 3)
9897 .groups(2)
9898 .group_input_channels(15)
9899 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9900 .iterations(1)
9901 .TestF16();
9902 }
9903 }
9904
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_input_width)9905 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_input_width) {
9906 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9907 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
9908 DeconvolutionOperatorTester()
9909 .batch_size(2)
9910 .input_size(kUnstridedInputHeight, input_width)
9911 .padding(1)
9912 .kernel_size(3, 3)
9913 .groups(2)
9914 .group_input_channels(15)
9915 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9916 .iterations(1)
9917 .TestF16();
9918 }
9919 }
9920
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_input_channels)9921 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_input_channels) {
9922 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9923 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
9924 DeconvolutionOperatorTester()
9925 .batch_size(2)
9926 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9927 .padding(1)
9928 .kernel_size(3, 3)
9929 .groups(2)
9930 .group_input_channels(input_channels)
9931 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9932 .iterations(1)
9933 .TestF16();
9934 }
9935 }
9936
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_output_channels)9937 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_output_channels) {
9938 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9939 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
9940 DeconvolutionOperatorTester()
9941 .batch_size(2)
9942 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9943 .padding(1)
9944 .kernel_size(3, 3)
9945 .groups(2)
9946 .group_input_channels(23)
9947 .group_output_channels(output_channels)
9948 .iterations(1)
9949 .TestF16();
9950 }
9951 }
9952
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_height_dilation)9953 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_height_dilation) {
9954 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9955 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
9956 DeconvolutionOperatorTester()
9957 .batch_size(2)
9958 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9959 .padding(1)
9960 .kernel_size(3, 3)
9961 .dilation_height(dilation_height)
9962 .groups(2)
9963 .group_input_channels(23)
9964 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9965 .iterations(3)
9966 .TestF16();
9967 }
9968 }
9969
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_width_dilation)9970 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_width_dilation) {
9971 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9972 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
9973 DeconvolutionOperatorTester()
9974 .batch_size(2)
9975 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9976 .padding(1)
9977 .kernel_size(3, 3)
9978 .dilation_width(dilation_width)
9979 .groups(2)
9980 .group_input_channels(23)
9981 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9982 .iterations(3)
9983 .TestF16();
9984 }
9985 }
9986
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_height_dilation_and_stride)9987 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_height_dilation_and_stride) {
9988 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9989 DeconvolutionOperatorTester()
9990 .batch_size(2)
9991 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9992 .padding(1)
9993 .kernel_size(3, 3)
9994 .dilation_height(3)
9995 .stride_width(2)
9996 .groups(2)
9997 .group_input_channels(23)
9998 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9999 .iterations(3)
10000 .TestF16();
10001 }
10002
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_width_dilation_and_stride)10003 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_width_dilation_and_stride) {
10004 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10005 DeconvolutionOperatorTester()
10006 .batch_size(2)
10007 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10008 .padding(1)
10009 .kernel_size(3, 3)
10010 .dilation_width(3)
10011 .stride_width(2)
10012 .groups(2)
10013 .group_input_channels(23)
10014 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10015 .iterations(3)
10016 .TestF16();
10017 }
10018
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_input_stride)10019 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_input_stride) {
10020 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10021 DeconvolutionOperatorTester()
10022 .batch_size(2)
10023 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10024 .padding(1)
10025 .kernel_size(3, 3)
10026 .groups(2)
10027 .group_input_channels(23)
10028 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10029 .input_pixel_stride(47)
10030 .iterations(3)
10031 .TestF16();
10032 }
10033
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_output_stride)10034 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_output_stride) {
10035 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10036 DeconvolutionOperatorTester()
10037 .batch_size(2)
10038 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10039 .padding(1)
10040 .kernel_size(3, 3)
10041 .groups(2)
10042 .group_input_channels(23)
10043 .group_output_channels(xnn_params.f16.gemm.nr + 3)
10044 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
10045 .iterations(3)
10046 .TestF16();
10047 }
10048
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_qmin)10049 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_qmin) {
10050 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10051 DeconvolutionOperatorTester()
10052 .batch_size(2)
10053 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10054 .padding(1)
10055 .kernel_size(3, 3)
10056 .groups(2)
10057 .group_input_channels(23)
10058 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10059 .qmin(128)
10060 .iterations(3)
10061 .TestF16();
10062 }
10063
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_qmax)10064 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_qmax) {
10065 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10066 DeconvolutionOperatorTester()
10067 .batch_size(2)
10068 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10069 .padding(1)
10070 .kernel_size(3, 3)
10071 .groups(2)
10072 .group_input_channels(23)
10073 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10074 .qmax(128)
10075 .iterations(3)
10076 .TestF16();
10077 }
10078
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_without_bias)10079 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_without_bias) {
10080 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10081 DeconvolutionOperatorTester()
10082 .has_bias(false)
10083 .batch_size(2)
10084 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10085 .padding(1)
10086 .kernel_size(3, 3)
10087 .groups(2)
10088 .group_input_channels(23)
10089 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10090 .iterations(3)
10091 .TestF16();
10092 }
10093
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_grouped_3x3)10094 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_grouped_3x3) {
10095 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10096 DeconvolutionOperatorTester()
10097 .batch_size(2)
10098 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10099 .padding(1)
10100 .kernel_size(3, 3)
10101 .groups(2)
10102 .group_input_channels(15)
10103 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10104 .use_weights_cache(true)
10105 .iterations(3)
10106 .TestF16();
10107 }
10108
10109 /**************************** CONV path, setup ****************************/
10110
10111 TEST(DECONVOLUTION_NHWC_F16, 3x3_setup_changing_batch) {
10112 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10113 DeconvolutionOperatorTester()
10114 .batch_size(2)
10115 .next_batch_size(5)
10116 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10117 .kernel_height(3)
10118 .kernel_width(5)
10119 .groups(2)
10120 .group_input_channels(15)
10121 .group_output_channels(17)
10122 .TestSetupF16();
10123 }
10124
10125 TEST(DECONVOLUTION_NHWC_F16, 3x3_setup_changing_height) {
10126 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10127 DeconvolutionOperatorTester()
10128 .batch_size(2)
10129 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10130 .next_input_height(kUnstridedInputHeight + 3)
10131 .kernel_height(3)
10132 .kernel_width(5)
10133 .groups(2)
10134 .group_input_channels(15)
10135 .group_output_channels(17)
10136 .TestSetupF16();
10137 }
10138
10139 TEST(DECONVOLUTION_NHWC_F16, 3x3_setup_changing_width) {
10140 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10141 DeconvolutionOperatorTester()
10142 .batch_size(2)
10143 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10144 .next_input_width(kUnstridedInputWidth + 3)
10145 .kernel_height(3)
10146 .kernel_width(5)
10147 .groups(2)
10148 .group_input_channels(15)
10149 .group_output_channels(17)
10150 .TestSetupF16();
10151 }
10152
10153 /**************************** SUBCONV2D/IGEMM path ****************************/
10154
10155 TEST(DECONVOLUTION_NHWC_F16, 3x3s2) {
10156 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10157 DeconvolutionOperatorTester()
10158 .input_size(kStridedInputHeight, kStridedInputWidth)
10159 .padding(1)
10160 .kernel_size(3, 3)
10161 .stride(2)
10162 .group_input_channels(15)
10163 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10164 .iterations(3)
10165 .TestF16();
10166 }
10167
10168 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_fp32_weights) {
10169 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10170 DeconvolutionOperatorTester()
10171 .input_size(kStridedInputHeight, kStridedInputWidth)
10172 .padding(1)
10173 .kernel_size(3, 3)
10174 .stride(2)
10175 .group_input_channels(15)
10176 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10177 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
10178 .iterations(3)
10179 .TestF16();
10180 }
10181
TEST(DECONVOLUTION_NHWC_F16,Kx3s2)10182 TEST(DECONVOLUTION_NHWC_F16, Kx3s2) {
10183 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10184 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
10185 DeconvolutionOperatorTester()
10186 .input_size(kStridedInputHeight, kStridedInputWidth)
10187 .padding_width(1)
10188 .kernel_size(kernel_height, 3)
10189 .stride(2)
10190 .group_input_channels(17)
10191 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10192 .iterations(3)
10193 .TestF16();
10194 }
10195 }
10196
10197 TEST(DECONVOLUTION_NHWC_F16, 3xKs2) {
10198 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10199 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
10200 DeconvolutionOperatorTester()
10201 .input_size(kStridedInputHeight, kStridedInputWidth)
10202 .padding_height(1)
10203 .kernel_size(3, kernel_width)
10204 .stride(2)
10205 .group_input_channels(17)
10206 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10207 .iterations(3)
10208 .TestF16();
10209 }
10210 }
10211
10212 TEST(DECONVOLUTION_NHWC_F16, 3x3sSx1) {
10213 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10214 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
10215 DeconvolutionOperatorTester()
10216 .input_size(kStridedInputHeight, kStridedInputWidth)
10217 .padding(1)
10218 .padding_width(1)
10219 .kernel_size(3, 3)
10220 .stride_height(stride_height)
10221 .group_input_channels(17)
10222 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10223 .iterations(3)
10224 .TestF16();
10225 }
10226 }
10227
10228 TEST(DECONVOLUTION_NHWC_F16, 3x3s1xS) {
10229 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10230 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
10231 DeconvolutionOperatorTester()
10232 .input_size(kStridedInputHeight, kStridedInputWidth)
10233 .padding(1)
10234 .padding_width(1)
10235 .kernel_size(3, 3)
10236 .stride_width(stride_width)
10237 .group_input_channels(17)
10238 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10239 .iterations(3)
10240 .TestF16();
10241 }
10242 }
10243
10244 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_height_padding) {
10245 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10246 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
10247 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
10248 DeconvolutionOperatorTester()
10249 .input_size(kStridedInputHeight, kStridedInputWidth)
10250 .padding_width(1)
10251 .padding_top(padding_top)
10252 .padding_bottom(padding_bottom)
10253 .kernel_size(3, 3)
10254 .stride(2)
10255 .group_input_channels(15)
10256 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10257 .iterations(1)
10258 .TestF16();
10259 }
10260 }
10261 }
10262
10263 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_width_padding) {
10264 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10265 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
10266 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
10267 DeconvolutionOperatorTester()
10268 .input_size(kStridedInputHeight, kStridedInputWidth)
10269 .padding_height(1)
10270 .padding_left(padding_left)
10271 .padding_right(padding_right)
10272 .kernel_size(3, 3)
10273 .stride(2)
10274 .group_input_channels(15)
10275 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10276 .iterations(1)
10277 .TestF16();
10278 }
10279 }
10280 }
10281
10282 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_height_adjustment) {
10283 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10284 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
10285 DeconvolutionOperatorTester()
10286 .input_size(kStridedInputHeight, kStridedInputWidth)
10287 .padding(1)
10288 .adjustment_height(adjustment_height)
10289 .kernel_size(3, 3)
10290 .stride(2)
10291 .group_input_channels(15)
10292 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10293 .iterations(1)
10294 .TestF16();
10295 }
10296 }
10297
10298 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_width_adjustment) {
10299 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10300 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
10301 DeconvolutionOperatorTester()
10302 .input_size(kStridedInputHeight, kStridedInputWidth)
10303 .padding(1)
10304 .adjustment_width(adjustment_width)
10305 .kernel_size(3, 3)
10306 .stride(2)
10307 .group_input_channels(15)
10308 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10309 .iterations(1)
10310 .TestF16();
10311 }
10312 }
10313
10314 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_input_height) {
10315 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10316 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
10317 DeconvolutionOperatorTester()
10318 .input_size(input_height, kStridedInputWidth)
10319 .padding(1)
10320 .kernel_size(3, 3)
10321 .stride(2)
10322 .group_input_channels(15)
10323 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10324 .iterations(1)
10325 .TestF16();
10326 }
10327 }
10328
10329 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_input_width) {
10330 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10331 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
10332 DeconvolutionOperatorTester()
10333 .input_size(kStridedInputHeight, kStridedInputWidth)
10334 .padding(1)
10335 .kernel_size(3, 3)
10336 .stride(2)
10337 .group_input_channels(15)
10338 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10339 .iterations(1)
10340 .TestF16();
10341 }
10342 }
10343
10344 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_input_channels) {
10345 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10346 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
10347 DeconvolutionOperatorTester()
10348 .input_size(kStridedInputHeight, kStridedInputWidth)
10349 .padding(1)
10350 .kernel_size(3, 3)
10351 .stride(2)
10352 .group_input_channels(input_channels)
10353 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10354 .iterations(1)
10355 .TestF16();
10356 }
10357 }
10358
10359 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_output_channels) {
10360 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10361 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
10362 DeconvolutionOperatorTester()
10363 .input_size(kStridedInputHeight, kStridedInputWidth)
10364 .padding(1)
10365 .kernel_size(3, 3)
10366 .stride(2)
10367 .group_input_channels(23)
10368 .group_output_channels(output_channels)
10369 .iterations(1)
10370 .TestF16();
10371 }
10372 }
10373
10374 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_input_stride) {
10375 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10376 DeconvolutionOperatorTester()
10377 .input_size(kStridedInputHeight, kStridedInputWidth)
10378 .padding(1)
10379 .kernel_size(3, 3)
10380 .stride(2)
10381 .group_input_channels(23)
10382 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10383 .input_pixel_stride(28)
10384 .iterations(3)
10385 .TestF16();
10386 }
10387
10388 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_output_stride) {
10389 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10390 DeconvolutionOperatorTester()
10391 .input_size(kStridedInputHeight, kStridedInputWidth)
10392 .padding(1)
10393 .kernel_size(3, 3)
10394 .stride(2)
10395 .group_input_channels(23)
10396 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10397 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
10398 .iterations(3)
10399 .TestF16();
10400 }
10401
10402 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_qmin) {
10403 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10404 DeconvolutionOperatorTester()
10405 .input_size(kStridedInputHeight, kStridedInputWidth)
10406 .padding(1)
10407 .kernel_size(3, 3)
10408 .stride(2)
10409 .group_input_channels(23)
10410 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10411 .qmin(128)
10412 .iterations(3)
10413 .TestF16();
10414 }
10415
10416 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_qmax) {
10417 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10418 DeconvolutionOperatorTester()
10419 .input_size(kStridedInputHeight, kStridedInputWidth)
10420 .padding(1)
10421 .kernel_size(3, 3)
10422 .stride(2)
10423 .group_input_channels(23)
10424 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10425 .qmax(128)
10426 .iterations(3)
10427 .TestF16();
10428 }
10429
10430 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_without_bias) {
10431 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10432 DeconvolutionOperatorTester()
10433 .has_bias(false)
10434 .input_size(kStridedInputHeight, kStridedInputWidth)
10435 .padding(1)
10436 .kernel_size(3, 3)
10437 .stride(2)
10438 .group_input_channels(23)
10439 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10440 .iterations(3)
10441 .TestF16();
10442 }
10443
TEST(DECONVOLUTION_NHWC_F16,weights_cache_3x3s2)10444 TEST(DECONVOLUTION_NHWC_F16, weights_cache_3x3s2) {
10445 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10446 DeconvolutionOperatorTester()
10447 .input_size(kStridedInputHeight, kStridedInputWidth)
10448 .padding(1)
10449 .kernel_size(3, 3)
10450 .stride(2)
10451 .group_input_channels(15)
10452 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10453 .use_weights_cache(true)
10454 .iterations(3)
10455 .TestF16();
10456 }
10457
10458 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
10459
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2)10460 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2) {
10461 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10462 DeconvolutionOperatorTester()
10463 .input_size(kStridedInputHeight, kStridedInputWidth)
10464 .padding(1)
10465 .kernel_size(3, 3)
10466 .stride(2)
10467 .groups(2)
10468 .group_input_channels(17)
10469 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10470 .iterations(3)
10471 .TestF16();
10472 }
10473
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_fp32_weights)10474 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_fp32_weights) {
10475 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10476 DeconvolutionOperatorTester()
10477 .input_size(kStridedInputHeight, kStridedInputWidth)
10478 .padding(1)
10479 .kernel_size(3, 3)
10480 .stride(2)
10481 .groups(2)
10482 .group_input_channels(17)
10483 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10484 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
10485 .iterations(3)
10486 .TestF16();
10487 }
10488
TEST(DECONVOLUTION_NHWC_F16,grouped_Kx3s2)10489 TEST(DECONVOLUTION_NHWC_F16, grouped_Kx3s2) {
10490 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10491 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
10492 DeconvolutionOperatorTester()
10493 .input_size(kStridedInputHeight, kStridedInputWidth)
10494 .padding_width(1)
10495 .kernel_size(kernel_height, 3)
10496 .stride(2)
10497 .groups(2)
10498 .group_input_channels(17)
10499 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10500 .iterations(3)
10501 .TestF16();
10502 }
10503 }
10504
TEST(DECONVOLUTION_NHWC_F16,grouped_3xKs2)10505 TEST(DECONVOLUTION_NHWC_F16, grouped_3xKs2) {
10506 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10507 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
10508 DeconvolutionOperatorTester()
10509 .input_size(kStridedInputHeight, kStridedInputWidth)
10510 .padding_height(1)
10511 .kernel_size(3, kernel_width)
10512 .stride(2)
10513 .groups(2)
10514 .group_input_channels(17)
10515 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10516 .iterations(3)
10517 .TestF16();
10518 }
10519 }
10520
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3sSx1)10521 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3sSx1) {
10522 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10523 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
10524 DeconvolutionOperatorTester()
10525 .input_size(kStridedInputHeight, kStridedInputWidth)
10526 .padding(1)
10527 .padding_width(1)
10528 .kernel_size(3, 3)
10529 .stride_height(stride_height)
10530 .groups(2)
10531 .group_input_channels(17)
10532 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10533 .iterations(3)
10534 .TestF16();
10535 }
10536 }
10537
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s1xS)10538 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s1xS) {
10539 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10540 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
10541 DeconvolutionOperatorTester()
10542 .input_size(kStridedInputHeight, kStridedInputWidth)
10543 .padding(1)
10544 .padding_width(1)
10545 .kernel_size(3, 3)
10546 .stride_width(stride_width)
10547 .groups(2)
10548 .group_input_channels(17)
10549 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10550 .iterations(3)
10551 .TestF16();
10552 }
10553 }
10554
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_height_padding)10555 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_height_padding) {
10556 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10557 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
10558 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
10559 DeconvolutionOperatorTester()
10560 .input_size(kStridedInputHeight, kStridedInputWidth)
10561 .padding_width(1)
10562 .padding_top(padding_top)
10563 .padding_bottom(padding_bottom)
10564 .kernel_size(3, 3)
10565 .stride(2)
10566 .groups(2)
10567 .group_input_channels(17)
10568 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10569 .iterations(1)
10570 .TestF16();
10571 }
10572 }
10573 }
10574
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_width_padding)10575 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_width_padding) {
10576 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10577 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
10578 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
10579 DeconvolutionOperatorTester()
10580 .input_size(kStridedInputHeight, kStridedInputWidth)
10581 .padding_height(1)
10582 .padding_left(padding_left)
10583 .padding_right(padding_right)
10584 .kernel_size(3, 3)
10585 .stride(2)
10586 .groups(2)
10587 .group_input_channels(17)
10588 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10589 .iterations(1)
10590 .TestF16();
10591 }
10592 }
10593 }
10594
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_height_adjustment)10595 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_height_adjustment) {
10596 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10597 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
10598 DeconvolutionOperatorTester()
10599 .input_size(kStridedInputHeight, kStridedInputWidth)
10600 .padding(1)
10601 .adjustment_height(adjustment_height)
10602 .kernel_size(3, 3)
10603 .stride(2)
10604 .groups(2)
10605 .group_input_channels(17)
10606 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10607 .iterations(1)
10608 .TestF16();
10609 }
10610 }
10611
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_width_adjustment)10612 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_width_adjustment) {
10613 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10614 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
10615 DeconvolutionOperatorTester()
10616 .input_size(kStridedInputHeight, kStridedInputWidth)
10617 .padding(1)
10618 .adjustment_width(adjustment_width)
10619 .kernel_size(3, 3)
10620 .stride(2)
10621 .groups(2)
10622 .group_input_channels(17)
10623 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10624 .iterations(1)
10625 .TestF16();
10626 }
10627 }
10628
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_input_height)10629 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_input_height) {
10630 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10631 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
10632 DeconvolutionOperatorTester()
10633 .input_size(input_height, kStridedInputWidth)
10634 .padding(1)
10635 .kernel_size(3, 3)
10636 .stride(2)
10637 .groups(2)
10638 .group_input_channels(17)
10639 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10640 .iterations(1)
10641 .TestF16();
10642 }
10643 }
10644
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_input_width)10645 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_input_width) {
10646 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10647 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
10648 DeconvolutionOperatorTester()
10649 .input_size(kStridedInputHeight, kStridedInputWidth)
10650 .padding(1)
10651 .kernel_size(3, 3)
10652 .stride(2)
10653 .groups(2)
10654 .group_input_channels(17)
10655 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10656 .iterations(1)
10657 .TestF16();
10658 }
10659 }
10660
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_input_channels)10661 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_input_channels) {
10662 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10663 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
10664 DeconvolutionOperatorTester()
10665 .input_size(kStridedInputHeight, kStridedInputWidth)
10666 .padding(1)
10667 .kernel_size(3, 3)
10668 .stride(2)
10669 .groups(2)
10670 .group_input_channels(input_channels)
10671 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10672 .iterations(1)
10673 .TestF16();
10674 }
10675 }
10676
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_output_channels)10677 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_output_channels) {
10678 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10679 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
10680 DeconvolutionOperatorTester()
10681 .input_size(kStridedInputHeight, kStridedInputWidth)
10682 .padding(1)
10683 .kernel_size(3, 3)
10684 .stride(2)
10685 .groups(2)
10686 .group_input_channels(17)
10687 .group_output_channels(output_channels)
10688 .iterations(1)
10689 .TestF16();
10690 }
10691 }
10692
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_input_stride)10693 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_input_stride) {
10694 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10695 DeconvolutionOperatorTester()
10696 .input_size(kStridedInputHeight, kStridedInputWidth)
10697 .padding(1)
10698 .kernel_size(3, 3)
10699 .stride(2)
10700 .groups(2)
10701 .group_input_channels(17)
10702 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10703 .input_pixel_stride(37)
10704 .iterations(3)
10705 .TestF16();
10706 }
10707
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_output_stride)10708 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_output_stride) {
10709 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10710 DeconvolutionOperatorTester()
10711 .input_size(kStridedInputHeight, kStridedInputWidth)
10712 .padding(1)
10713 .kernel_size(3, 3)
10714 .stride(2)
10715 .groups(2)
10716 .group_input_channels(17)
10717 .group_output_channels(xnn_params.f16.gemm.nr + 3)
10718 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
10719 .iterations(3)
10720 .TestF16();
10721 }
10722
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_qmin)10723 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_qmin) {
10724 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10725 DeconvolutionOperatorTester()
10726 .input_size(kStridedInputHeight, kStridedInputWidth)
10727 .padding(1)
10728 .kernel_size(3, 3)
10729 .stride(2)
10730 .groups(2)
10731 .group_input_channels(17)
10732 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10733 .qmin(128)
10734 .iterations(3)
10735 .TestF16();
10736 }
10737
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_qmax)10738 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_qmax) {
10739 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10740 DeconvolutionOperatorTester()
10741 .input_size(kStridedInputHeight, kStridedInputWidth)
10742 .padding(1)
10743 .kernel_size(3, 3)
10744 .stride(2)
10745 .groups(2)
10746 .group_input_channels(17)
10747 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10748 .qmax(128)
10749 .iterations(3)
10750 .TestF16();
10751 }
10752
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_without_bias)10753 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_without_bias) {
10754 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10755 DeconvolutionOperatorTester()
10756 .has_bias(false)
10757 .input_size(kStridedInputHeight, kStridedInputWidth)
10758 .padding(1)
10759 .kernel_size(3, 3)
10760 .stride(2)
10761 .groups(2)
10762 .group_input_channels(17)
10763 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10764 .iterations(3)
10765 .TestF16();
10766 }
10767
TEST(DECONVOLUTION_NHWC_F16,weights_cache_grouped_3x3s2)10768 TEST(DECONVOLUTION_NHWC_F16, weights_cache_grouped_3x3s2) {
10769 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10770 DeconvolutionOperatorTester()
10771 .input_size(kStridedInputHeight, kStridedInputWidth)
10772 .padding(1)
10773 .kernel_size(3, 3)
10774 .stride(2)
10775 .groups(2)
10776 .group_input_channels(17)
10777 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10778 .use_weights_cache(true)
10779 .iterations(3)
10780 .TestF16();
10781 }
10782
10783 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
10784
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2)10785 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2) {
10786 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10787 DeconvolutionOperatorTester()
10788 .batch_size(2)
10789 .input_size(kStridedInputHeight, kStridedInputWidth)
10790 .padding(1)
10791 .kernel_size(3, 3)
10792 .stride(2)
10793 .group_input_channels(15)
10794 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10795 .iterations(3)
10796 .TestF16();
10797 }
10798
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_fp32_weights)10799 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_fp32_weights) {
10800 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10801 DeconvolutionOperatorTester()
10802 .batch_size(2)
10803 .input_size(kStridedInputHeight, kStridedInputWidth)
10804 .padding(1)
10805 .kernel_size(3, 3)
10806 .stride(2)
10807 .group_input_channels(15)
10808 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10809 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
10810 .iterations(3)
10811 .TestF16();
10812 }
10813
TEST(DECONVOLUTION_NHWC_F16,batched_Kx3s2)10814 TEST(DECONVOLUTION_NHWC_F16, batched_Kx3s2) {
10815 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10816 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
10817 DeconvolutionOperatorTester()
10818 .batch_size(2)
10819 .input_size(kStridedInputHeight, kStridedInputWidth)
10820 .padding_width(1)
10821 .kernel_size(kernel_height, 3)
10822 .stride(2)
10823 .group_input_channels(17)
10824 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10825 .iterations(3)
10826 .TestF16();
10827 }
10828 }
10829
TEST(DECONVOLUTION_NHWC_F16,batched_3xKs2)10830 TEST(DECONVOLUTION_NHWC_F16, batched_3xKs2) {
10831 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10832 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
10833 DeconvolutionOperatorTester()
10834 .batch_size(2)
10835 .input_size(kStridedInputHeight, kStridedInputWidth)
10836 .padding_height(1)
10837 .kernel_size(3, kernel_width)
10838 .stride(2)
10839 .group_input_channels(17)
10840 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10841 .iterations(3)
10842 .TestF16();
10843 }
10844 }
10845
TEST(DECONVOLUTION_NHWC_F16,batched_3x3sSx1)10846 TEST(DECONVOLUTION_NHWC_F16, batched_3x3sSx1) {
10847 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10848 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
10849 DeconvolutionOperatorTester()
10850 .batch_size(2)
10851 .input_size(kStridedInputHeight, kStridedInputWidth)
10852 .padding(1)
10853 .padding_width(1)
10854 .kernel_size(3, 3)
10855 .stride_height(stride_height)
10856 .group_input_channels(17)
10857 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10858 .iterations(3)
10859 .TestF16();
10860 }
10861 }
10862
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s1xS)10863 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s1xS) {
10864 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10865 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
10866 DeconvolutionOperatorTester()
10867 .batch_size(2)
10868 .input_size(kStridedInputHeight, kStridedInputWidth)
10869 .padding(1)
10870 .padding_width(1)
10871 .kernel_size(3, 3)
10872 .stride_width(stride_width)
10873 .group_input_channels(17)
10874 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10875 .iterations(3)
10876 .TestF16();
10877 }
10878 }
10879
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_height_padding)10880 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_height_padding) {
10881 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10882 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
10883 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
10884 DeconvolutionOperatorTester()
10885 .batch_size(2)
10886 .input_size(kStridedInputHeight, kStridedInputWidth)
10887 .padding_width(1)
10888 .padding_top(padding_top)
10889 .padding_bottom(padding_bottom)
10890 .kernel_size(3, 3)
10891 .stride(2)
10892 .group_input_channels(15)
10893 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10894 .iterations(1)
10895 .TestF16();
10896 }
10897 }
10898 }
10899
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_width_padding)10900 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_width_padding) {
10901 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10902 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
10903 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
10904 DeconvolutionOperatorTester()
10905 .batch_size(2)
10906 .input_size(kStridedInputHeight, kStridedInputWidth)
10907 .padding_height(1)
10908 .padding_left(padding_left)
10909 .padding_right(padding_right)
10910 .kernel_size(3, 3)
10911 .stride(2)
10912 .group_input_channels(15)
10913 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10914 .iterations(1)
10915 .TestF16();
10916 }
10917 }
10918 }
10919
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_height_adjustment)10920 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_height_adjustment) {
10921 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10922 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
10923 DeconvolutionOperatorTester()
10924 .batch_size(2)
10925 .input_size(kStridedInputHeight, kStridedInputWidth)
10926 .padding(1)
10927 .adjustment_height(adjustment_height)
10928 .kernel_size(3, 3)
10929 .stride(2)
10930 .group_input_channels(15)
10931 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10932 .iterations(1)
10933 .TestF16();
10934 }
10935 }
10936
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_width_adjustment)10937 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_width_adjustment) {
10938 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10939 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
10940 DeconvolutionOperatorTester()
10941 .batch_size(2)
10942 .input_size(kStridedInputHeight, kStridedInputWidth)
10943 .padding(1)
10944 .adjustment_width(adjustment_width)
10945 .kernel_size(3, 3)
10946 .stride(2)
10947 .group_input_channels(15)
10948 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10949 .iterations(1)
10950 .TestF16();
10951 }
10952 }
10953
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_input_height)10954 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_input_height) {
10955 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10956 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
10957 DeconvolutionOperatorTester()
10958 .batch_size(2)
10959 .input_size(input_height, kStridedInputWidth)
10960 .padding(1)
10961 .kernel_size(3, 3)
10962 .stride(2)
10963 .group_input_channels(15)
10964 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10965 .iterations(1)
10966 .TestF16();
10967 }
10968 }
10969
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_input_width)10970 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_input_width) {
10971 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10972 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
10973 DeconvolutionOperatorTester()
10974 .batch_size(2)
10975 .input_size(kStridedInputHeight, kStridedInputWidth)
10976 .padding(1)
10977 .kernel_size(3, 3)
10978 .stride(2)
10979 .group_input_channels(15)
10980 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10981 .iterations(1)
10982 .TestF16();
10983 }
10984 }
10985
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_input_channels)10986 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_input_channels) {
10987 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10988 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
10989 DeconvolutionOperatorTester()
10990 .batch_size(2)
10991 .input_size(kStridedInputHeight, kStridedInputWidth)
10992 .padding(1)
10993 .kernel_size(3, 3)
10994 .stride(2)
10995 .group_input_channels(input_channels)
10996 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10997 .iterations(1)
10998 .TestF16();
10999 }
11000 }
11001
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_output_channels)11002 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_output_channels) {
11003 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11004 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11005 DeconvolutionOperatorTester()
11006 .batch_size(2)
11007 .input_size(kStridedInputHeight, kStridedInputWidth)
11008 .padding(1)
11009 .kernel_size(3, 3)
11010 .stride(2)
11011 .group_input_channels(23)
11012 .group_output_channels(output_channels)
11013 .iterations(1)
11014 .TestF16();
11015 }
11016 }
11017
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_input_stride)11018 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_input_stride) {
11019 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11020 DeconvolutionOperatorTester()
11021 .batch_size(2)
11022 .input_size(kStridedInputHeight, kStridedInputWidth)
11023 .padding(1)
11024 .kernel_size(3, 3)
11025 .stride(2)
11026 .group_input_channels(23)
11027 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11028 .input_pixel_stride(28)
11029 .iterations(3)
11030 .TestF16();
11031 }
11032
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_output_stride)11033 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_output_stride) {
11034 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11035 DeconvolutionOperatorTester()
11036 .batch_size(2)
11037 .input_size(kStridedInputHeight, kStridedInputWidth)
11038 .padding(1)
11039 .kernel_size(3, 3)
11040 .stride(2)
11041 .group_input_channels(23)
11042 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11043 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11044 .iterations(3)
11045 .TestF16();
11046 }
11047
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_qmin)11048 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_qmin) {
11049 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11050 DeconvolutionOperatorTester()
11051 .batch_size(2)
11052 .input_size(kStridedInputHeight, kStridedInputWidth)
11053 .padding(1)
11054 .kernel_size(3, 3)
11055 .stride(2)
11056 .group_input_channels(23)
11057 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11058 .qmin(128)
11059 .iterations(3)
11060 .TestF16();
11061 }
11062
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_qmax)11063 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_qmax) {
11064 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11065 DeconvolutionOperatorTester()
11066 .batch_size(2)
11067 .input_size(kStridedInputHeight, kStridedInputWidth)
11068 .padding(1)
11069 .kernel_size(3, 3)
11070 .stride(2)
11071 .group_input_channels(23)
11072 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11073 .qmax(128)
11074 .iterations(3)
11075 .TestF16();
11076 }
11077
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_without_bias)11078 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_without_bias) {
11079 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11080 DeconvolutionOperatorTester()
11081 .has_bias(false)
11082 .batch_size(2)
11083 .input_size(kStridedInputHeight, kStridedInputWidth)
11084 .padding(1)
11085 .kernel_size(3, 3)
11086 .stride(2)
11087 .group_input_channels(23)
11088 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11089 .iterations(3)
11090 .TestF16();
11091 }
11092
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_3x3s2)11093 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_3x3s2) {
11094 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11095 DeconvolutionOperatorTester()
11096 .batch_size(2)
11097 .input_size(kStridedInputHeight, kStridedInputWidth)
11098 .padding(1)
11099 .kernel_size(3, 3)
11100 .stride(2)
11101 .group_input_channels(15)
11102 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11103 .use_weights_cache(true)
11104 .iterations(3)
11105 .TestF16();
11106 }
11107
11108 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
11109
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2)11110 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2) {
11111 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11112 DeconvolutionOperatorTester()
11113 .batch_size(2)
11114 .input_size(kStridedInputHeight, kStridedInputWidth)
11115 .padding(1)
11116 .kernel_size(3, 3)
11117 .stride(2)
11118 .groups(2)
11119 .group_input_channels(17)
11120 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11121 .iterations(3)
11122 .TestF16();
11123 }
11124
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_fp32_weights)11125 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_fp32_weights) {
11126 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11127 DeconvolutionOperatorTester()
11128 .batch_size(2)
11129 .input_size(kStridedInputHeight, kStridedInputWidth)
11130 .padding(1)
11131 .kernel_size(3, 3)
11132 .stride(2)
11133 .groups(2)
11134 .group_input_channels(17)
11135 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11136 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11137 .iterations(3)
11138 .TestF16();
11139 }
11140
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_Kx3s2)11141 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_Kx3s2) {
11142 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11143 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
11144 DeconvolutionOperatorTester()
11145 .batch_size(2)
11146 .input_size(kStridedInputHeight, kStridedInputWidth)
11147 .padding_width(1)
11148 .kernel_size(kernel_height, 3)
11149 .stride(2)
11150 .groups(2)
11151 .group_input_channels(17)
11152 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11153 .iterations(3)
11154 .TestF16();
11155 }
11156 }
11157
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3xKs2)11158 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3xKs2) {
11159 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11160 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
11161 DeconvolutionOperatorTester()
11162 .batch_size(2)
11163 .input_size(kStridedInputHeight, kStridedInputWidth)
11164 .padding_height(1)
11165 .kernel_size(3, kernel_width)
11166 .stride(2)
11167 .groups(2)
11168 .group_input_channels(17)
11169 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11170 .iterations(3)
11171 .TestF16();
11172 }
11173 }
11174
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3sSx1)11175 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3sSx1) {
11176 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11177 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
11178 DeconvolutionOperatorTester()
11179 .batch_size(2)
11180 .input_size(kStridedInputHeight, kStridedInputWidth)
11181 .padding(1)
11182 .padding_width(1)
11183 .kernel_size(3, 3)
11184 .stride_height(stride_height)
11185 .groups(2)
11186 .group_input_channels(17)
11187 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11188 .iterations(3)
11189 .TestF16();
11190 }
11191 }
11192
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s1xS)11193 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s1xS) {
11194 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11195 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
11196 DeconvolutionOperatorTester()
11197 .batch_size(2)
11198 .input_size(kStridedInputHeight, kStridedInputWidth)
11199 .padding(1)
11200 .padding_width(1)
11201 .kernel_size(3, 3)
11202 .stride_width(stride_width)
11203 .groups(2)
11204 .group_input_channels(17)
11205 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11206 .iterations(3)
11207 .TestF16();
11208 }
11209 }
11210
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_height_padding)11211 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_height_padding) {
11212 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11213 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
11214 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
11215 DeconvolutionOperatorTester()
11216 .batch_size(2)
11217 .input_size(kStridedInputHeight, kStridedInputWidth)
11218 .padding_width(1)
11219 .padding_top(padding_top)
11220 .padding_bottom(padding_bottom)
11221 .kernel_size(3, 3)
11222 .stride(2)
11223 .groups(2)
11224 .group_input_channels(17)
11225 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11226 .iterations(1)
11227 .TestF16();
11228 }
11229 }
11230 }
11231
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_width_padding)11232 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_width_padding) {
11233 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11234 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
11235 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
11236 DeconvolutionOperatorTester()
11237 .batch_size(2)
11238 .input_size(kStridedInputHeight, kStridedInputWidth)
11239 .padding_height(1)
11240 .padding_left(padding_left)
11241 .padding_right(padding_right)
11242 .kernel_size(3, 3)
11243 .stride(2)
11244 .groups(2)
11245 .group_input_channels(17)
11246 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11247 .iterations(1)
11248 .TestF16();
11249 }
11250 }
11251 }
11252
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_height_adjustment)11253 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_height_adjustment) {
11254 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11255 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
11256 DeconvolutionOperatorTester()
11257 .batch_size(2)
11258 .input_size(kStridedInputHeight, kStridedInputWidth)
11259 .padding(1)
11260 .adjustment_height(adjustment_height)
11261 .kernel_size(3, 3)
11262 .stride(2)
11263 .groups(2)
11264 .group_input_channels(17)
11265 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11266 .iterations(1)
11267 .TestF16();
11268 }
11269 }
11270
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_width_adjustment)11271 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_width_adjustment) {
11272 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11273 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
11274 DeconvolutionOperatorTester()
11275 .batch_size(2)
11276 .input_size(kStridedInputHeight, kStridedInputWidth)
11277 .padding(1)
11278 .adjustment_width(adjustment_width)
11279 .kernel_size(3, 3)
11280 .stride(2)
11281 .groups(2)
11282 .group_input_channels(17)
11283 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11284 .iterations(1)
11285 .TestF16();
11286 }
11287 }
11288
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_input_height)11289 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_input_height) {
11290 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11291 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
11292 DeconvolutionOperatorTester()
11293 .batch_size(2)
11294 .input_size(input_height, kStridedInputWidth)
11295 .padding(1)
11296 .kernel_size(3, 3)
11297 .stride(2)
11298 .groups(2)
11299 .group_input_channels(17)
11300 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11301 .iterations(1)
11302 .TestF16();
11303 }
11304 }
11305
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_input_width)11306 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_input_width) {
11307 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11308 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
11309 DeconvolutionOperatorTester()
11310 .batch_size(2)
11311 .input_size(kStridedInputHeight, kStridedInputWidth)
11312 .padding(1)
11313 .kernel_size(3, 3)
11314 .stride(2)
11315 .groups(2)
11316 .group_input_channels(17)
11317 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11318 .iterations(1)
11319 .TestF16();
11320 }
11321 }
11322
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_input_channels)11323 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_input_channels) {
11324 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11325 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
11326 DeconvolutionOperatorTester()
11327 .batch_size(2)
11328 .input_size(kStridedInputHeight, kStridedInputWidth)
11329 .padding(1)
11330 .kernel_size(3, 3)
11331 .stride(2)
11332 .groups(2)
11333 .group_input_channels(input_channels)
11334 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11335 .iterations(1)
11336 .TestF16();
11337 }
11338 }
11339
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_output_channels)11340 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_output_channels) {
11341 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11342 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11343 DeconvolutionOperatorTester()
11344 .batch_size(2)
11345 .input_size(kStridedInputHeight, kStridedInputWidth)
11346 .padding(1)
11347 .kernel_size(3, 3)
11348 .stride(2)
11349 .groups(2)
11350 .group_input_channels(17)
11351 .group_output_channels(output_channels)
11352 .iterations(1)
11353 .TestF16();
11354 }
11355 }
11356
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_input_stride)11357 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_input_stride) {
11358 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11359 DeconvolutionOperatorTester()
11360 .batch_size(2)
11361 .input_size(kStridedInputHeight, kStridedInputWidth)
11362 .padding(1)
11363 .kernel_size(3, 3)
11364 .stride(2)
11365 .groups(2)
11366 .group_input_channels(17)
11367 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11368 .input_pixel_stride(37)
11369 .iterations(3)
11370 .TestF16();
11371 }
11372
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_output_stride)11373 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_output_stride) {
11374 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11375 DeconvolutionOperatorTester()
11376 .batch_size(2)
11377 .input_size(kStridedInputHeight, kStridedInputWidth)
11378 .padding(1)
11379 .kernel_size(3, 3)
11380 .stride(2)
11381 .groups(2)
11382 .group_input_channels(17)
11383 .group_output_channels(xnn_params.f16.gemm.nr + 3)
11384 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11385 .iterations(3)
11386 .TestF16();
11387 }
11388
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_qmin)11389 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_qmin) {
11390 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11391 DeconvolutionOperatorTester()
11392 .batch_size(2)
11393 .input_size(kStridedInputHeight, kStridedInputWidth)
11394 .padding(1)
11395 .kernel_size(3, 3)
11396 .stride(2)
11397 .groups(2)
11398 .group_input_channels(17)
11399 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11400 .qmin(128)
11401 .iterations(3)
11402 .TestF16();
11403 }
11404
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_qmax)11405 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_qmax) {
11406 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11407 DeconvolutionOperatorTester()
11408 .batch_size(2)
11409 .input_size(kStridedInputHeight, kStridedInputWidth)
11410 .padding(1)
11411 .kernel_size(3, 3)
11412 .stride(2)
11413 .groups(2)
11414 .group_input_channels(17)
11415 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11416 .qmax(128)
11417 .iterations(3)
11418 .TestF16();
11419 }
11420
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_without_bias)11421 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_without_bias) {
11422 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11423 DeconvolutionOperatorTester()
11424 .has_bias(false)
11425 .batch_size(2)
11426 .input_size(kStridedInputHeight, kStridedInputWidth)
11427 .padding(1)
11428 .kernel_size(3, 3)
11429 .stride(2)
11430 .groups(2)
11431 .group_input_channels(17)
11432 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11433 .iterations(3)
11434 .TestF16();
11435 }
11436
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_grouped_3x3s2)11437 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_grouped_3x3s2) {
11438 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11439 DeconvolutionOperatorTester()
11440 .batch_size(2)
11441 .input_size(kStridedInputHeight, kStridedInputWidth)
11442 .padding(1)
11443 .kernel_size(3, 3)
11444 .stride(2)
11445 .groups(2)
11446 .group_input_channels(17)
11447 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11448 .use_weights_cache(true)
11449 .iterations(3)
11450 .TestF16();
11451 }
11452
11453 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
11454
11455
11456 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_setup_changing_height) {
11457 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11458 DeconvolutionOperatorTester()
11459 .batch_size(2)
11460 .input_size(kStridedInputHeight, kStridedInputWidth)
11461 .next_input_height(kStridedInputHeight + 3)
11462 .kernel_size(3, 3)
11463 .stride(2)
11464 .groups(2)
11465 .group_input_channels(15)
11466 .group_output_channels(17)
11467 .TestSetupF16();
11468 }
11469
11470 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_setup_changing_width) {
11471 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11472 DeconvolutionOperatorTester()
11473 .batch_size(2)
11474 .input_size(kStridedInputHeight, kStridedInputWidth)
11475 .next_input_width(kStridedInputWidth + 3)
11476 .kernel_size(3, 3)
11477 .stride(2)
11478 .groups(2)
11479 .group_input_channels(15)
11480 .group_output_channels(17)
11481 .TestSetupF16();
11482 }
11483
11484 /**************************** SUBCONV2D/GEMM path ****************************/
11485
11486 TEST(DECONVOLUTION_NHWC_F16, 2x2s2) {
11487 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11488 DeconvolutionOperatorTester()
11489 .input_size(kStridedInputHeight, kStridedInputWidth)
11490 .kernel_size(2, 2)
11491 .stride(2)
11492 .group_input_channels(15)
11493 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11494 .iterations(3)
11495 .TestF16();
11496 }
11497
11498 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_fp32_weights) {
11499 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11500 DeconvolutionOperatorTester()
11501 .input_size(kStridedInputHeight, kStridedInputWidth)
11502 .kernel_size(2, 2)
11503 .stride(2)
11504 .group_input_channels(15)
11505 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11506 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11507 .iterations(3)
11508 .TestF16();
11509 }
11510
TEST(DECONVOLUTION_NHWC_F16,Kx2sKx2)11511 TEST(DECONVOLUTION_NHWC_F16, Kx2sKx2) {
11512 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11513 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
11514 DeconvolutionOperatorTester()
11515 .input_size(kStridedInputHeight, kStridedInputWidth)
11516 .kernel_size(kernel_height, 2)
11517 .stride(kernel_height, 2)
11518 .group_input_channels(17)
11519 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11520 .iterations(3)
11521 .TestF16();
11522 }
11523 }
11524
11525 TEST(DECONVOLUTION_NHWC_F16, 2xKs2xK) {
11526 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11527 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
11528 DeconvolutionOperatorTester()
11529 .input_size(kStridedInputHeight, kStridedInputWidth)
11530 .kernel_size(2, kernel_width)
11531 .stride(2, kernel_width)
11532 .group_input_channels(17)
11533 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11534 .iterations(3)
11535 .TestF16();
11536 }
11537 }
11538
11539 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_height_adjustment) {
11540 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11541 DeconvolutionOperatorTester()
11542 .input_size(kStridedInputHeight, kStridedInputWidth)
11543 .adjustment_height(1)
11544 .kernel_size(2, 2)
11545 .stride(2)
11546 .group_input_channels(15)
11547 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11548 .iterations(1)
11549 .TestF16();
11550 }
11551
11552 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_width_adjustment) {
11553 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11554 DeconvolutionOperatorTester()
11555 .input_size(kStridedInputHeight, kStridedInputWidth)
11556 .adjustment_width(1)
11557 .kernel_size(2, 2)
11558 .stride(2)
11559 .group_input_channels(15)
11560 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11561 .iterations(1)
11562 .TestF16();
11563 }
11564
11565 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_input_height) {
11566 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11567 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
11568 DeconvolutionOperatorTester()
11569 .input_size(input_height, kStridedInputWidth)
11570 .kernel_size(2, 2)
11571 .stride(2)
11572 .group_input_channels(15)
11573 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11574 .iterations(1)
11575 .TestF16();
11576 }
11577 }
11578
11579 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_input_width) {
11580 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11581 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
11582 DeconvolutionOperatorTester()
11583 .input_size(kStridedInputHeight, kStridedInputWidth)
11584 .kernel_size(2, 2)
11585 .stride(2)
11586 .group_input_channels(15)
11587 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11588 .iterations(1)
11589 .TestF16();
11590 }
11591 }
11592
11593 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_input_channels) {
11594 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11595 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
11596 DeconvolutionOperatorTester()
11597 .input_size(kStridedInputHeight, kStridedInputWidth)
11598 .kernel_size(2, 2)
11599 .stride(2)
11600 .group_input_channels(input_channels)
11601 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11602 .iterations(1)
11603 .TestF16();
11604 }
11605 }
11606
11607 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_output_channels) {
11608 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11609 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11610 DeconvolutionOperatorTester()
11611 .input_size(kStridedInputHeight, kStridedInputWidth)
11612 .kernel_size(2, 2)
11613 .stride(2)
11614 .group_input_channels(23)
11615 .group_output_channels(output_channels)
11616 .iterations(1)
11617 .TestF16();
11618 }
11619 }
11620
11621 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_input_stride) {
11622 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11623 DeconvolutionOperatorTester()
11624 .input_size(kStridedInputHeight, kStridedInputWidth)
11625 .kernel_size(2, 2)
11626 .stride(2)
11627 .group_input_channels(23)
11628 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11629 .input_pixel_stride(28)
11630 .iterations(3)
11631 .TestF16();
11632 }
11633
11634 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_output_stride) {
11635 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11636 DeconvolutionOperatorTester()
11637 .input_size(kStridedInputHeight, kStridedInputWidth)
11638 .kernel_size(2, 2)
11639 .stride(2)
11640 .group_input_channels(23)
11641 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11642 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11643 .iterations(3)
11644 .TestF16();
11645 }
11646
11647 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_qmin) {
11648 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11649 DeconvolutionOperatorTester()
11650 .input_size(kStridedInputHeight, kStridedInputWidth)
11651 .kernel_size(2, 2)
11652 .stride(2)
11653 .group_input_channels(23)
11654 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11655 .qmin(128)
11656 .iterations(3)
11657 .TestF16();
11658 }
11659
11660 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_qmax) {
11661 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11662 DeconvolutionOperatorTester()
11663 .input_size(kStridedInputHeight, kStridedInputWidth)
11664 .kernel_size(2, 2)
11665 .stride(2)
11666 .group_input_channels(23)
11667 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11668 .qmax(128)
11669 .iterations(3)
11670 .TestF16();
11671 }
11672
11673 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_without_bias) {
11674 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11675 DeconvolutionOperatorTester()
11676 .has_bias(false)
11677 .input_size(kStridedInputHeight, kStridedInputWidth)
11678 .kernel_size(2, 2)
11679 .stride(2)
11680 .group_input_channels(23)
11681 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11682 .iterations(3)
11683 .TestF16();
11684 }
11685
TEST(DECONVOLUTION_NHWC_F16,weights_cache_2x2s2)11686 TEST(DECONVOLUTION_NHWC_F16, weights_cache_2x2s2) {
11687 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11688 DeconvolutionOperatorTester()
11689 .input_size(kStridedInputHeight, kStridedInputWidth)
11690 .kernel_size(2, 2)
11691 .stride(2)
11692 .group_input_channels(15)
11693 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11694 .use_weights_cache(true)
11695 .iterations(3)
11696 .TestF16();
11697 }
11698
11699 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
11700
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2)11701 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2) {
11702 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11703 DeconvolutionOperatorTester()
11704 .input_size(kStridedInputHeight, kStridedInputWidth)
11705 .kernel_size(2, 2)
11706 .stride(2)
11707 .groups(2)
11708 .group_input_channels(17)
11709 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11710 .iterations(3)
11711 .TestF16();
11712 }
11713
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_fp32_weights)11714 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_fp32_weights) {
11715 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11716 DeconvolutionOperatorTester()
11717 .input_size(kStridedInputHeight, kStridedInputWidth)
11718 .kernel_size(2, 2)
11719 .stride(2)
11720 .groups(2)
11721 .group_input_channels(17)
11722 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11723 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11724 .iterations(3)
11725 .TestF16();
11726 }
11727
TEST(DECONVOLUTION_NHWC_F16,grouped_Kx2sKx2)11728 TEST(DECONVOLUTION_NHWC_F16, grouped_Kx2sKx2) {
11729 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11730 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
11731 DeconvolutionOperatorTester()
11732 .input_size(kStridedInputHeight, kStridedInputWidth)
11733 .kernel_size(kernel_height, 2)
11734 .stride(kernel_height, 2)
11735 .groups(2)
11736 .group_input_channels(17)
11737 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11738 .iterations(3)
11739 .TestF16();
11740 }
11741 }
11742
TEST(DECONVOLUTION_NHWC_F16,grouped_2xKs2xK)11743 TEST(DECONVOLUTION_NHWC_F16, grouped_2xKs2xK) {
11744 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11745 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
11746 DeconvolutionOperatorTester()
11747 .input_size(kStridedInputHeight, kStridedInputWidth)
11748 .kernel_size(2, kernel_width)
11749 .stride(2, kernel_width)
11750 .groups(2)
11751 .group_input_channels(17)
11752 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11753 .iterations(3)
11754 .TestF16();
11755 }
11756 }
11757
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_height_adjustment)11758 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_height_adjustment) {
11759 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11760 DeconvolutionOperatorTester()
11761 .input_size(kStridedInputHeight, kStridedInputWidth)
11762 .adjustment_height(1)
11763 .kernel_size(2, 2)
11764 .stride(2)
11765 .groups(2)
11766 .group_input_channels(17)
11767 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11768 .iterations(1)
11769 .TestF16();
11770 }
11771
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_width_adjustment)11772 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_width_adjustment) {
11773 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11774 DeconvolutionOperatorTester()
11775 .input_size(kStridedInputHeight, kStridedInputWidth)
11776 .adjustment_width(1)
11777 .kernel_size(2, 2)
11778 .stride(2)
11779 .groups(2)
11780 .group_input_channels(17)
11781 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11782 .iterations(1)
11783 .TestF16();
11784 }
11785
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_input_height)11786 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_input_height) {
11787 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11788 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
11789 DeconvolutionOperatorTester()
11790 .input_size(input_height, kStridedInputWidth)
11791 .kernel_size(2, 2)
11792 .stride(2)
11793 .groups(2)
11794 .group_input_channels(17)
11795 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11796 .iterations(1)
11797 .TestF16();
11798 }
11799 }
11800
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_input_width)11801 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_input_width) {
11802 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11803 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
11804 DeconvolutionOperatorTester()
11805 .input_size(kStridedInputHeight, kStridedInputWidth)
11806 .kernel_size(2, 2)
11807 .stride(2)
11808 .groups(2)
11809 .group_input_channels(17)
11810 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11811 .iterations(1)
11812 .TestF16();
11813 }
11814 }
11815
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_input_channels)11816 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_input_channels) {
11817 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11818 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
11819 DeconvolutionOperatorTester()
11820 .input_size(kStridedInputHeight, kStridedInputWidth)
11821 .kernel_size(2, 2)
11822 .stride(2)
11823 .groups(2)
11824 .group_input_channels(input_channels)
11825 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11826 .iterations(1)
11827 .TestF16();
11828 }
11829 }
11830
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_output_channels)11831 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_output_channels) {
11832 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11833 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11834 DeconvolutionOperatorTester()
11835 .input_size(kStridedInputHeight, kStridedInputWidth)
11836 .kernel_size(2, 2)
11837 .stride(2)
11838 .groups(2)
11839 .group_input_channels(17)
11840 .group_output_channels(output_channels)
11841 .iterations(1)
11842 .TestF16();
11843 }
11844 }
11845
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_input_stride)11846 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_input_stride) {
11847 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11848 DeconvolutionOperatorTester()
11849 .input_size(kStridedInputHeight, kStridedInputWidth)
11850 .kernel_size(2, 2)
11851 .stride(2)
11852 .groups(2)
11853 .group_input_channels(17)
11854 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11855 .input_pixel_stride(37)
11856 .iterations(3)
11857 .TestF16();
11858 }
11859
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_output_stride)11860 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_output_stride) {
11861 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11862 DeconvolutionOperatorTester()
11863 .input_size(kStridedInputHeight, kStridedInputWidth)
11864 .kernel_size(2, 2)
11865 .stride(2)
11866 .groups(2)
11867 .group_input_channels(17)
11868 .group_output_channels(xnn_params.f16.gemm.nr + 3)
11869 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11870 .iterations(3)
11871 .TestF16();
11872 }
11873
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_qmin)11874 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_qmin) {
11875 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11876 DeconvolutionOperatorTester()
11877 .input_size(kStridedInputHeight, kStridedInputWidth)
11878 .kernel_size(2, 2)
11879 .stride(2)
11880 .groups(2)
11881 .group_input_channels(17)
11882 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11883 .qmin(128)
11884 .iterations(3)
11885 .TestF16();
11886 }
11887
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_qmax)11888 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_qmax) {
11889 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11890 DeconvolutionOperatorTester()
11891 .input_size(kStridedInputHeight, kStridedInputWidth)
11892 .kernel_size(2, 2)
11893 .stride(2)
11894 .groups(2)
11895 .group_input_channels(17)
11896 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11897 .qmax(128)
11898 .iterations(3)
11899 .TestF16();
11900 }
11901
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_without_bias)11902 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_without_bias) {
11903 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11904 DeconvolutionOperatorTester()
11905 .has_bias(false)
11906 .input_size(kStridedInputHeight, kStridedInputWidth)
11907 .kernel_size(2, 2)
11908 .stride(2)
11909 .groups(2)
11910 .group_input_channels(17)
11911 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11912 .iterations(3)
11913 .TestF16();
11914 }
11915
TEST(DECONVOLUTION_NHWC_F16,weights_cache_grouped_2x2s2)11916 TEST(DECONVOLUTION_NHWC_F16, weights_cache_grouped_2x2s2) {
11917 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11918 DeconvolutionOperatorTester()
11919 .input_size(kStridedInputHeight, kStridedInputWidth)
11920 .kernel_size(2, 2)
11921 .stride(2)
11922 .groups(2)
11923 .group_input_channels(17)
11924 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11925 .use_weights_cache(true)
11926 .iterations(3)
11927 .TestF16();
11928 }
11929
11930 /**************************** SUBCONV2D/GEMM path, batched ****************************/
11931
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2)11932 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2) {
11933 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11934 DeconvolutionOperatorTester()
11935 .batch_size(2)
11936 .input_size(kStridedInputHeight, kStridedInputWidth)
11937 .kernel_size(2, 2)
11938 .stride(2)
11939 .group_input_channels(15)
11940 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11941 .iterations(3)
11942 .TestF16();
11943 }
11944
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_fp32_weights)11945 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_fp32_weights) {
11946 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11947 DeconvolutionOperatorTester()
11948 .batch_size(2)
11949 .input_size(kStridedInputHeight, kStridedInputWidth)
11950 .kernel_size(2, 2)
11951 .stride(2)
11952 .group_input_channels(15)
11953 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11954 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11955 .iterations(3)
11956 .TestF16();
11957 }
11958
TEST(DECONVOLUTION_NHWC_F16,batched_Kx2sKx2)11959 TEST(DECONVOLUTION_NHWC_F16, batched_Kx2sKx2) {
11960 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11961 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
11962 DeconvolutionOperatorTester()
11963 .batch_size(2)
11964 .input_size(kStridedInputHeight, kStridedInputWidth)
11965 .kernel_size(kernel_height, 2)
11966 .stride(kernel_height, 2)
11967 .group_input_channels(17)
11968 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11969 .iterations(3)
11970 .TestF16();
11971 }
11972 }
11973
TEST(DECONVOLUTION_NHWC_F16,batched_2xKs2xK)11974 TEST(DECONVOLUTION_NHWC_F16, batched_2xKs2xK) {
11975 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11976 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
11977 DeconvolutionOperatorTester()
11978 .batch_size(2)
11979 .input_size(kStridedInputHeight, kStridedInputWidth)
11980 .kernel_size(2, kernel_width)
11981 .stride(2, kernel_width)
11982 .group_input_channels(17)
11983 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11984 .iterations(3)
11985 .TestF16();
11986 }
11987 }
11988
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_height_adjustment)11989 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_height_adjustment) {
11990 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11991 DeconvolutionOperatorTester()
11992 .batch_size(2)
11993 .input_size(kStridedInputHeight, kStridedInputWidth)
11994 .adjustment_height(1)
11995 .kernel_size(2, 2)
11996 .stride(2)
11997 .group_input_channels(15)
11998 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11999 .iterations(1)
12000 .TestF16();
12001 }
12002
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_width_adjustment)12003 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_width_adjustment) {
12004 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12005 DeconvolutionOperatorTester()
12006 .batch_size(2)
12007 .input_size(kStridedInputHeight, kStridedInputWidth)
12008 .adjustment_width(1)
12009 .kernel_size(2, 2)
12010 .stride(2)
12011 .group_input_channels(15)
12012 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12013 .iterations(1)
12014 .TestF16();
12015 }
12016
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_input_height)12017 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_input_height) {
12018 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12019 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
12020 DeconvolutionOperatorTester()
12021 .batch_size(2)
12022 .input_size(input_height, kStridedInputWidth)
12023 .kernel_size(2, 2)
12024 .stride(2)
12025 .group_input_channels(15)
12026 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12027 .iterations(1)
12028 .TestF16();
12029 }
12030 }
12031
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_input_width)12032 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_input_width) {
12033 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12034 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
12035 DeconvolutionOperatorTester()
12036 .batch_size(2)
12037 .input_size(kStridedInputHeight, kStridedInputWidth)
12038 .kernel_size(2, 2)
12039 .stride(2)
12040 .group_input_channels(15)
12041 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12042 .iterations(1)
12043 .TestF16();
12044 }
12045 }
12046
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_input_channels)12047 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_input_channels) {
12048 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12049 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12050 DeconvolutionOperatorTester()
12051 .batch_size(2)
12052 .input_size(kStridedInputHeight, kStridedInputWidth)
12053 .kernel_size(2, 2)
12054 .stride(2)
12055 .group_input_channels(input_channels)
12056 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12057 .iterations(1)
12058 .TestF16();
12059 }
12060 }
12061
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_output_channels)12062 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_output_channels) {
12063 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12064 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
12065 DeconvolutionOperatorTester()
12066 .batch_size(2)
12067 .input_size(kStridedInputHeight, kStridedInputWidth)
12068 .kernel_size(2, 2)
12069 .stride(2)
12070 .group_input_channels(23)
12071 .group_output_channels(output_channels)
12072 .iterations(1)
12073 .TestF16();
12074 }
12075 }
12076
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_input_stride)12077 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_input_stride) {
12078 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12079 DeconvolutionOperatorTester()
12080 .batch_size(2)
12081 .input_size(kStridedInputHeight, kStridedInputWidth)
12082 .kernel_size(2, 2)
12083 .stride(2)
12084 .group_input_channels(23)
12085 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12086 .input_pixel_stride(28)
12087 .iterations(3)
12088 .TestF16();
12089 }
12090
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_output_stride)12091 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_output_stride) {
12092 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12093 DeconvolutionOperatorTester()
12094 .batch_size(2)
12095 .input_size(kStridedInputHeight, kStridedInputWidth)
12096 .kernel_size(2, 2)
12097 .stride(2)
12098 .group_input_channels(23)
12099 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12100 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
12101 .iterations(3)
12102 .TestF16();
12103 }
12104
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_qmin)12105 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_qmin) {
12106 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12107 DeconvolutionOperatorTester()
12108 .batch_size(2)
12109 .input_size(kStridedInputHeight, kStridedInputWidth)
12110 .kernel_size(2, 2)
12111 .stride(2)
12112 .group_input_channels(23)
12113 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12114 .qmin(128)
12115 .iterations(3)
12116 .TestF16();
12117 }
12118
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_qmax)12119 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_qmax) {
12120 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12121 DeconvolutionOperatorTester()
12122 .batch_size(2)
12123 .input_size(kStridedInputHeight, kStridedInputWidth)
12124 .kernel_size(2, 2)
12125 .stride(2)
12126 .group_input_channels(23)
12127 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12128 .qmax(128)
12129 .iterations(3)
12130 .TestF16();
12131 }
12132
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_without_bias)12133 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_without_bias) {
12134 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12135 DeconvolutionOperatorTester()
12136 .has_bias(false)
12137 .batch_size(2)
12138 .input_size(kStridedInputHeight, kStridedInputWidth)
12139 .kernel_size(2, 2)
12140 .stride(2)
12141 .group_input_channels(23)
12142 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12143 .iterations(3)
12144 .TestF16();
12145 }
12146
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_2x2s2)12147 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_2x2s2) {
12148 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12149 DeconvolutionOperatorTester()
12150 .batch_size(2)
12151 .input_size(kStridedInputHeight, kStridedInputWidth)
12152 .kernel_size(2, 2)
12153 .stride(2)
12154 .group_input_channels(15)
12155 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12156 .use_weights_cache(true)
12157 .iterations(3)
12158 .TestF16();
12159 }
12160
12161 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
12162
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2)12163 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2) {
12164 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12165 DeconvolutionOperatorTester()
12166 .batch_size(2)
12167 .input_size(kStridedInputHeight, kStridedInputWidth)
12168 .kernel_size(2, 2)
12169 .stride(2)
12170 .groups(2)
12171 .group_input_channels(17)
12172 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12173 .iterations(3)
12174 .TestF16();
12175 }
12176
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_fp32_weights)12177 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_fp32_weights) {
12178 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12179 DeconvolutionOperatorTester()
12180 .batch_size(2)
12181 .input_size(kStridedInputHeight, kStridedInputWidth)
12182 .kernel_size(2, 2)
12183 .stride(2)
12184 .groups(2)
12185 .group_input_channels(17)
12186 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12187 .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
12188 .iterations(3)
12189 .TestF16();
12190 }
12191
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_Kx2sKx2)12192 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_Kx2sKx2) {
12193 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12194 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
12195 DeconvolutionOperatorTester()
12196 .batch_size(2)
12197 .input_size(kStridedInputHeight, kStridedInputWidth)
12198 .kernel_size(kernel_height, 2)
12199 .stride(kernel_height, 2)
12200 .groups(2)
12201 .group_input_channels(17)
12202 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12203 .iterations(3)
12204 .TestF16();
12205 }
12206 }
12207
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2xKs2xK)12208 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2xKs2xK) {
12209 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12210 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
12211 DeconvolutionOperatorTester()
12212 .batch_size(2)
12213 .input_size(kStridedInputHeight, kStridedInputWidth)
12214 .kernel_size(2, kernel_width)
12215 .stride(2, kernel_width)
12216 .groups(2)
12217 .group_input_channels(17)
12218 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12219 .iterations(3)
12220 .TestF16();
12221 }
12222 }
12223
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_height_adjustment)12224 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_height_adjustment) {
12225 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12226 DeconvolutionOperatorTester()
12227 .batch_size(2)
12228 .input_size(kStridedInputHeight, kStridedInputWidth)
12229 .adjustment_height(1)
12230 .kernel_size(2, 2)
12231 .stride(2)
12232 .groups(2)
12233 .group_input_channels(17)
12234 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12235 .iterations(1)
12236 .TestF16();
12237 }
12238
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_width_adjustment)12239 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_width_adjustment) {
12240 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12241 DeconvolutionOperatorTester()
12242 .batch_size(2)
12243 .input_size(kStridedInputHeight, kStridedInputWidth)
12244 .adjustment_width(1)
12245 .kernel_size(2, 2)
12246 .stride(2)
12247 .groups(2)
12248 .group_input_channels(17)
12249 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12250 .iterations(1)
12251 .TestF16();
12252 }
12253
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_input_height)12254 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_input_height) {
12255 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12256 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
12257 DeconvolutionOperatorTester()
12258 .batch_size(2)
12259 .input_size(input_height, kStridedInputWidth)
12260 .kernel_size(2, 2)
12261 .stride(2)
12262 .groups(2)
12263 .group_input_channels(17)
12264 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12265 .iterations(1)
12266 .TestF16();
12267 }
12268 }
12269
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_input_width)12270 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_input_width) {
12271 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12272 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
12273 DeconvolutionOperatorTester()
12274 .batch_size(2)
12275 .input_size(kStridedInputHeight, kStridedInputWidth)
12276 .kernel_size(2, 2)
12277 .stride(2)
12278 .groups(2)
12279 .group_input_channels(17)
12280 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12281 .iterations(1)
12282 .TestF16();
12283 }
12284 }
12285
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_input_channels)12286 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_input_channels) {
12287 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12288 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
12289 DeconvolutionOperatorTester()
12290 .batch_size(2)
12291 .input_size(kStridedInputHeight, kStridedInputWidth)
12292 .kernel_size(2, 2)
12293 .stride(2)
12294 .groups(2)
12295 .group_input_channels(input_channels)
12296 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12297 .iterations(1)
12298 .TestF16();
12299 }
12300 }
12301
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_output_channels)12302 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_output_channels) {
12303 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12304 for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
12305 DeconvolutionOperatorTester()
12306 .batch_size(2)
12307 .input_size(kStridedInputHeight, kStridedInputWidth)
12308 .kernel_size(2, 2)
12309 .stride(2)
12310 .groups(2)
12311 .group_input_channels(17)
12312 .group_output_channels(output_channels)
12313 .iterations(1)
12314 .TestF16();
12315 }
12316 }
12317
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_input_stride)12318 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_input_stride) {
12319 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12320 DeconvolutionOperatorTester()
12321 .batch_size(2)
12322 .input_size(kStridedInputHeight, kStridedInputWidth)
12323 .kernel_size(2, 2)
12324 .stride(2)
12325 .groups(2)
12326 .group_input_channels(17)
12327 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12328 .input_pixel_stride(37)
12329 .iterations(3)
12330 .TestF16();
12331 }
12332
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_output_stride)12333 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_output_stride) {
12334 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12335 DeconvolutionOperatorTester()
12336 .batch_size(2)
12337 .input_size(kStridedInputHeight, kStridedInputWidth)
12338 .kernel_size(2, 2)
12339 .stride(2)
12340 .groups(2)
12341 .group_input_channels(17)
12342 .group_output_channels(xnn_params.f16.gemm.nr + 3)
12343 .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
12344 .iterations(3)
12345 .TestF16();
12346 }
12347
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_qmin)12348 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_qmin) {
12349 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12350 DeconvolutionOperatorTester()
12351 .batch_size(2)
12352 .input_size(kStridedInputHeight, kStridedInputWidth)
12353 .kernel_size(2, 2)
12354 .stride(2)
12355 .groups(2)
12356 .group_input_channels(17)
12357 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12358 .qmin(128)
12359 .iterations(3)
12360 .TestF16();
12361 }
12362
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_qmax)12363 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_qmax) {
12364 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12365 DeconvolutionOperatorTester()
12366 .batch_size(2)
12367 .input_size(kStridedInputHeight, kStridedInputWidth)
12368 .kernel_size(2, 2)
12369 .stride(2)
12370 .groups(2)
12371 .group_input_channels(17)
12372 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12373 .qmax(128)
12374 .iterations(3)
12375 .TestF16();
12376 }
12377
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_without_bias)12378 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_without_bias) {
12379 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12380 DeconvolutionOperatorTester()
12381 .has_bias(false)
12382 .batch_size(2)
12383 .input_size(kStridedInputHeight, kStridedInputWidth)
12384 .kernel_size(2, 2)
12385 .stride(2)
12386 .groups(2)
12387 .group_input_channels(17)
12388 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12389 .iterations(3)
12390 .TestF16();
12391 }
12392
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_grouped_2x2s2)12393 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_grouped_2x2s2) {
12394 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12395 DeconvolutionOperatorTester()
12396 .batch_size(2)
12397 .input_size(kStridedInputHeight, kStridedInputWidth)
12398 .kernel_size(2, 2)
12399 .stride(2)
12400 .groups(2)
12401 .group_input_channels(17)
12402 .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12403 .use_weights_cache(true)
12404 .iterations(3)
12405 .TestF16();
12406 }
12407
12408 // /**************************** SUBCONV2D/GEMM path, setup ****************************/
12409
12410 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_setup_changing_batch) {
12411 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12412 DeconvolutionOperatorTester()
12413 .batch_size(2)
12414 .next_batch_size(5)
12415 .input_size(kStridedInputHeight, kStridedInputWidth)
12416 .kernel_size(2, 2)
12417 .stride(2)
12418 .groups(2)
12419 .group_input_channels(15)
12420 .group_output_channels(17)
12421 .TestSetupF16();
12422 }
12423
12424 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_setup_changing_height) {
12425 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12426 DeconvolutionOperatorTester()
12427 .batch_size(2)
12428 .input_size(kStridedInputHeight, kStridedInputWidth)
12429 .next_input_height(kStridedInputHeight + 3)
12430 .kernel_size(2, 2)
12431 .stride(2)
12432 .groups(2)
12433 .group_input_channels(15)
12434 .group_output_channels(17)
12435 .TestSetupF16();
12436 }
12437
12438 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_setup_changing_width) {
12439 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12440 DeconvolutionOperatorTester()
12441 .batch_size(2)
12442 .input_size(kStridedInputHeight, kStridedInputWidth)
12443 .next_input_width(kStridedInputWidth + 3)
12444 .kernel_size(2, 2)
12445 .stride(2)
12446 .groups(2)
12447 .group_input_channels(15)
12448 .group_output_channels(17)
12449 .TestSetupF16();
12450 }
12451
12452 /**************************** Future GEMM path ****************************/
12453
12454 TEST(DECONVOLUTION_NHWC_F32, 1x1) {
12455 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12456 DeconvolutionOperatorTester()
12457 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12458 .kernel_size(1, 1)
12459 .group_input_channels(23)
12460 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12461 .iterations(3)
12462 .TestF32();
12463 }
12464
12465 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_input_width) {
12466 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12467 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12468 DeconvolutionOperatorTester()
12469 .input_size(input_height, kUnstridedInputWidth)
12470 .kernel_size(1, 1)
12471 .group_input_channels(23)
12472 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12473 .iterations(1)
12474 .TestF32();
12475 }
12476 }
12477
12478 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_input_height) {
12479 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12480 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12481 DeconvolutionOperatorTester()
12482 .input_size(kUnstridedInputHeight, input_width)
12483 .kernel_size(1, 1)
12484 .group_input_channels(23)
12485 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12486 .iterations(1)
12487 .TestF32();
12488 }
12489 }
12490
12491 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_input_channels) {
12492 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12493 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12494 DeconvolutionOperatorTester()
12495 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12496 .kernel_size(1, 1)
12497 .group_input_channels(input_channels)
12498 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12499 .iterations(1)
12500 .TestF32();
12501 }
12502 }
12503
12504 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_output_channels) {
12505 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12506 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12507 DeconvolutionOperatorTester()
12508 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12509 .kernel_size(1, 1)
12510 .group_input_channels(23)
12511 .group_output_channels(output_channels)
12512 .iterations(1)
12513 .TestF32();
12514 }
12515 }
12516
12517 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_input_stride) {
12518 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12519 DeconvolutionOperatorTester()
12520 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12521 .kernel_size(1, 1)
12522 .group_input_channels(23)
12523 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12524 .input_pixel_stride(28)
12525 .iterations(3)
12526 .TestF32();
12527 }
12528
12529 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_output_stride) {
12530 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12531 DeconvolutionOperatorTester()
12532 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12533 .kernel_size(1, 1)
12534 .group_input_channels(23)
12535 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12536 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12537 .iterations(3)
12538 .TestF32();
12539 }
12540
12541 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_qmin) {
12542 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12543 DeconvolutionOperatorTester()
12544 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12545 .kernel_size(1, 1)
12546 .group_input_channels(23)
12547 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12548 .qmin(128)
12549 .iterations(3)
12550 .TestF32();
12551 }
12552
12553 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_qmax) {
12554 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12555 DeconvolutionOperatorTester()
12556 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12557 .kernel_size(1, 1)
12558 .group_input_channels(23)
12559 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12560 .qmax(128)
12561 .iterations(3)
12562 .TestF32();
12563 }
12564
12565 TEST(DECONVOLUTION_NHWC_F32, 1x1_without_bias) {
12566 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12567 DeconvolutionOperatorTester()
12568 .has_bias(false)
12569 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12570 .kernel_size(1, 1)
12571 .group_input_channels(23)
12572 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12573 .iterations(3)
12574 .TestF32();
12575 }
12576
12577 /**************************** Future GEMM path, grouped ****************************/
12578
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1)12579 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1) {
12580 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12581 DeconvolutionOperatorTester()
12582 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12583 .kernel_size(1, 1)
12584 .groups(2)
12585 .group_input_channels(23)
12586 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12587 .iterations(3)
12588 .TestF32();
12589 }
12590
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_input_width)12591 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_input_width) {
12592 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12593 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12594 DeconvolutionOperatorTester()
12595 .input_size(input_height, kUnstridedInputWidth)
12596 .kernel_size(1, 1)
12597 .groups(2)
12598 .group_input_channels(23)
12599 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12600 .iterations(1)
12601 .TestF32();
12602 }
12603 }
12604
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_input_height)12605 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_input_height) {
12606 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12607 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12608 DeconvolutionOperatorTester()
12609 .input_size(kUnstridedInputHeight, input_width)
12610 .kernel_size(1, 1)
12611 .groups(2)
12612 .group_input_channels(23)
12613 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12614 .iterations(1)
12615 .TestF32();
12616 }
12617 }
12618
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_input_channels)12619 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_input_channels) {
12620 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12621 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12622 DeconvolutionOperatorTester()
12623 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12624 .kernel_size(1, 1)
12625 .groups(2)
12626 .group_input_channels(input_channels)
12627 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12628 .iterations(1)
12629 .TestF32();
12630 }
12631 }
12632
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_output_channels)12633 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_output_channels) {
12634 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12635 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12636 DeconvolutionOperatorTester()
12637 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12638 .kernel_size(1, 1)
12639 .groups(2)
12640 .group_input_channels(23)
12641 .group_output_channels(output_channels)
12642 .iterations(1)
12643 .TestF32();
12644 }
12645 }
12646
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_input_stride)12647 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_input_stride) {
12648 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12649 DeconvolutionOperatorTester()
12650 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12651 .kernel_size(1, 1)
12652 .groups(2)
12653 .group_input_channels(23)
12654 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12655 .input_pixel_stride(47)
12656 .iterations(3)
12657 .TestF32();
12658 }
12659
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_output_stride)12660 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_output_stride) {
12661 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12662 DeconvolutionOperatorTester()
12663 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12664 .kernel_size(1, 1)
12665 .groups(2)
12666 .group_input_channels(23)
12667 .group_output_channels(xnn_params.f32.gemm.nr + 3)
12668 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12669 .iterations(3)
12670 .TestF32();
12671 }
12672
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_qmin)12673 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_qmin) {
12674 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12675 DeconvolutionOperatorTester()
12676 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12677 .kernel_size(1, 1)
12678 .groups(2)
12679 .group_input_channels(23)
12680 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12681 .qmin(128)
12682 .iterations(3)
12683 .TestF32();
12684 }
12685
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_qmax)12686 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_qmax) {
12687 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12688 DeconvolutionOperatorTester()
12689 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12690 .kernel_size(1, 1)
12691 .groups(2)
12692 .group_input_channels(23)
12693 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12694 .qmax(128)
12695 .iterations(3)
12696 .TestF32();
12697 }
12698
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_without_bias)12699 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_without_bias) {
12700 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12701 DeconvolutionOperatorTester()
12702 .has_bias(false)
12703 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12704 .kernel_size(1, 1)
12705 .groups(2)
12706 .group_input_channels(23)
12707 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12708 .iterations(3)
12709 .TestF32();
12710 }
12711
12712 /**************************** Future GEMM path, batched ****************************/
12713
TEST(DECONVOLUTION_NHWC_F32,batched_1x1)12714 TEST(DECONVOLUTION_NHWC_F32, batched_1x1) {
12715 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12716 DeconvolutionOperatorTester()
12717 .batch_size(2)
12718 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12719 .kernel_size(1, 1)
12720 .group_input_channels(23)
12721 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12722 .iterations(3)
12723 .TestF32();
12724 }
12725
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_input_width)12726 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_input_width) {
12727 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12728 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12729 DeconvolutionOperatorTester()
12730 .batch_size(2)
12731 .input_size(input_height, kUnstridedInputWidth)
12732 .kernel_size(1, 1)
12733 .group_input_channels(23)
12734 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12735 .iterations(1)
12736 .TestF32();
12737 }
12738 }
12739
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_input_height)12740 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_input_height) {
12741 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12742 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12743 DeconvolutionOperatorTester()
12744 .batch_size(2)
12745 .input_size(kUnstridedInputHeight, input_width)
12746 .kernel_size(1, 1)
12747 .group_input_channels(23)
12748 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12749 .iterations(1)
12750 .TestF32();
12751 }
12752 }
12753
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_input_channels)12754 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_input_channels) {
12755 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12756 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12757 DeconvolutionOperatorTester()
12758 .batch_size(2)
12759 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12760 .kernel_size(1, 1)
12761 .group_input_channels(input_channels)
12762 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12763 .iterations(1)
12764 .TestF32();
12765 }
12766 }
12767
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_output_channels)12768 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_output_channels) {
12769 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12770 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12771 DeconvolutionOperatorTester()
12772 .batch_size(2)
12773 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12774 .kernel_size(1, 1)
12775 .group_input_channels(23)
12776 .group_output_channels(output_channels)
12777 .iterations(1)
12778 .TestF32();
12779 }
12780 }
12781
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_input_stride)12782 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_input_stride) {
12783 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12784 DeconvolutionOperatorTester()
12785 .batch_size(2)
12786 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12787 .kernel_size(1, 1)
12788 .group_input_channels(23)
12789 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12790 .input_pixel_stride(28)
12791 .iterations(3)
12792 .TestF32();
12793 }
12794
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_output_stride)12795 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_output_stride) {
12796 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12797 DeconvolutionOperatorTester()
12798 .batch_size(2)
12799 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12800 .kernel_size(1, 1)
12801 .group_input_channels(23)
12802 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12803 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12804 .iterations(3)
12805 .TestF32();
12806 }
12807
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_qmin)12808 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_qmin) {
12809 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12810 DeconvolutionOperatorTester()
12811 .batch_size(2)
12812 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12813 .kernel_size(1, 1)
12814 .group_input_channels(23)
12815 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12816 .qmin(128)
12817 .iterations(3)
12818 .TestF32();
12819 }
12820
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_qmax)12821 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_qmax) {
12822 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12823 DeconvolutionOperatorTester()
12824 .batch_size(2)
12825 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12826 .kernel_size(1, 1)
12827 .group_input_channels(23)
12828 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12829 .qmax(128)
12830 .iterations(3)
12831 .TestF32();
12832 }
12833
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_without_bias)12834 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_without_bias) {
12835 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12836 DeconvolutionOperatorTester()
12837 .has_bias(false)
12838 .batch_size(2)
12839 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12840 .kernel_size(1, 1)
12841 .group_input_channels(23)
12842 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12843 .iterations(3)
12844 .TestF32();
12845 }
12846
12847 /**************************** Future GEMM path, batched, grouped ****************************/
12848
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1)12849 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1) {
12850 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12851 DeconvolutionOperatorTester()
12852 .batch_size(2)
12853 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12854 .kernel_size(1, 1)
12855 .groups(2)
12856 .group_input_channels(23)
12857 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12858 .iterations(3)
12859 .TestF32();
12860 }
12861
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_input_width)12862 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_input_width) {
12863 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12864 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12865 DeconvolutionOperatorTester()
12866 .batch_size(2)
12867 .input_size(input_height, kUnstridedInputWidth)
12868 .kernel_size(1, 1)
12869 .groups(2)
12870 .group_input_channels(23)
12871 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12872 .iterations(1)
12873 .TestF32();
12874 }
12875 }
12876
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_input_height)12877 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_input_height) {
12878 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12879 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12880 DeconvolutionOperatorTester()
12881 .batch_size(2)
12882 .input_size(kUnstridedInputHeight, input_width)
12883 .kernel_size(1, 1)
12884 .groups(2)
12885 .group_input_channels(23)
12886 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12887 .iterations(1)
12888 .TestF32();
12889 }
12890 }
12891
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_input_channels)12892 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_input_channels) {
12893 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12894 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12895 DeconvolutionOperatorTester()
12896 .batch_size(2)
12897 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12898 .kernel_size(1, 1)
12899 .groups(2)
12900 .group_input_channels(input_channels)
12901 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12902 .iterations(1)
12903 .TestF32();
12904 }
12905 }
12906
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_output_channels)12907 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_output_channels) {
12908 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12909 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12910 DeconvolutionOperatorTester()
12911 .batch_size(2)
12912 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12913 .kernel_size(1, 1)
12914 .groups(2)
12915 .group_input_channels(23)
12916 .group_output_channels(output_channels)
12917 .iterations(1)
12918 .TestF32();
12919 }
12920 }
12921
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_input_stride)12922 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_input_stride) {
12923 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12924 DeconvolutionOperatorTester()
12925 .batch_size(2)
12926 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12927 .kernel_size(1, 1)
12928 .groups(2)
12929 .group_input_channels(23)
12930 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12931 .input_pixel_stride(47)
12932 .iterations(3)
12933 .TestF32();
12934 }
12935
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_output_stride)12936 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_output_stride) {
12937 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12938 DeconvolutionOperatorTester()
12939 .batch_size(2)
12940 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12941 .kernel_size(1, 1)
12942 .groups(2)
12943 .group_input_channels(23)
12944 .group_output_channels(xnn_params.f32.gemm.nr + 3)
12945 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12946 .iterations(3)
12947 .TestF32();
12948 }
12949
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_qmin)12950 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_qmin) {
12951 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12952 DeconvolutionOperatorTester()
12953 .batch_size(2)
12954 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12955 .kernel_size(1, 1)
12956 .groups(2)
12957 .group_input_channels(23)
12958 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12959 .qmin(128)
12960 .iterations(3)
12961 .TestF32();
12962 }
12963
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_qmax)12964 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_qmax) {
12965 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12966 DeconvolutionOperatorTester()
12967 .batch_size(2)
12968 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12969 .kernel_size(1, 1)
12970 .groups(2)
12971 .group_input_channels(23)
12972 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12973 .qmax(128)
12974 .iterations(3)
12975 .TestF32();
12976 }
12977
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_without_bias)12978 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_without_bias) {
12979 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12980 DeconvolutionOperatorTester()
12981 .has_bias(false)
12982 .batch_size(2)
12983 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12984 .kernel_size(1, 1)
12985 .groups(2)
12986 .group_input_channels(23)
12987 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12988 .iterations(3)
12989 .TestF32();
12990 }
12991
12992 /**************************** CONV path ****************************/
12993
12994 TEST(DECONVOLUTION_NHWC_F32, 3x3) {
12995 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12996 DeconvolutionOperatorTester()
12997 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12998 .padding(1)
12999 .kernel_size(3, 3)
13000 .group_input_channels(15)
13001 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13002 .iterations(3)
13003 .TestF32();
13004 }
13005
TEST(DECONVOLUTION_NHWC_F32,Kx3)13006 TEST(DECONVOLUTION_NHWC_F32, Kx3) {
13007 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13008 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13009 DeconvolutionOperatorTester()
13010 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13011 .padding_width(1)
13012 .kernel_size(kernel_height, 3)
13013 .group_input_channels(17)
13014 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13015 .iterations(3)
13016 .TestF32();
13017 }
13018 }
13019
13020 TEST(DECONVOLUTION_NHWC_F32, 3xK) {
13021 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13022 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13023 DeconvolutionOperatorTester()
13024 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13025 .padding_height(1)
13026 .kernel_size(3, kernel_width)
13027 .group_input_channels(17)
13028 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13029 .iterations(3)
13030 .TestF32();
13031 }
13032 }
13033
13034 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_height_padding) {
13035 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13036 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13037 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13038 DeconvolutionOperatorTester()
13039 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13040 .padding_width(1)
13041 .padding_top(padding_top)
13042 .padding_bottom(padding_bottom)
13043 .kernel_size(3, 3)
13044 .group_input_channels(15)
13045 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13046 .iterations(1)
13047 .TestF32();
13048 }
13049 }
13050 }
13051
13052 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_width_padding) {
13053 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13054 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
13055 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
13056 DeconvolutionOperatorTester()
13057 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13058 .padding_height(1)
13059 .padding_left(padding_left)
13060 .padding_right(padding_right)
13061 .kernel_size(3, 3)
13062 .group_input_channels(15)
13063 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13064 .iterations(1)
13065 .TestF32();
13066 }
13067 }
13068 }
13069
13070 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_height_adjustment) {
13071 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13072 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
13073 DeconvolutionOperatorTester()
13074 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13075 .padding(1)
13076 .stride_height(adjustment_height + 1)
13077 .adjustment_height(adjustment_height)
13078 .kernel_size(3, 3)
13079 .group_input_channels(15)
13080 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13081 .iterations(1)
13082 .TestF32();
13083 }
13084 }
13085
13086 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_width_adjustment) {
13087 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13088 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
13089 DeconvolutionOperatorTester()
13090 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13091 .padding(1)
13092 .stride_width(adjustment_width + 1)
13093 .adjustment_width(adjustment_width)
13094 .kernel_size(3, 3)
13095 .group_input_channels(15)
13096 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13097 .iterations(1)
13098 .TestF32();
13099 }
13100 }
13101
13102 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_input_height) {
13103 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13104 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
13105 DeconvolutionOperatorTester()
13106 .input_size(input_height, kUnstridedInputWidth)
13107 .padding(1)
13108 .kernel_size(3, 3)
13109 .group_input_channels(15)
13110 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13111 .iterations(1)
13112 .TestF32();
13113 }
13114 }
13115
13116 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_input_width) {
13117 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13118 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
13119 DeconvolutionOperatorTester()
13120 .input_size(kUnstridedInputHeight, input_width)
13121 .padding(1)
13122 .kernel_size(3, 3)
13123 .group_input_channels(15)
13124 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13125 .iterations(1)
13126 .TestF32();
13127 }
13128 }
13129
13130 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_input_channels) {
13131 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13132 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
13133 DeconvolutionOperatorTester()
13134 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13135 .padding(1)
13136 .kernel_size(3, 3)
13137 .group_input_channels(input_channels)
13138 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13139 .iterations(1)
13140 .TestF32();
13141 }
13142 }
13143
13144 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_output_channels) {
13145 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13146 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
13147 DeconvolutionOperatorTester()
13148 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13149 .padding(1)
13150 .kernel_size(3, 3)
13151 .group_input_channels(23)
13152 .group_output_channels(output_channels)
13153 .iterations(1)
13154 .TestF32();
13155 }
13156 }
13157
13158 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_height_dilation) {
13159 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13160 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
13161 DeconvolutionOperatorTester()
13162 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13163 .padding(1)
13164 .kernel_size(3, 3)
13165 .dilation_height(dilation_height)
13166 .group_input_channels(23)
13167 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13168 .iterations(3)
13169 .TestF32();
13170 }
13171 }
13172
13173 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_width_dilation) {
13174 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13175 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
13176 DeconvolutionOperatorTester()
13177 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13178 .padding(1)
13179 .kernel_size(3, 3)
13180 .dilation_width(dilation_width)
13181 .group_input_channels(23)
13182 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13183 .iterations(3)
13184 .TestF32();
13185 }
13186 }
13187
13188 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_height_dilation_and_stride) {
13189 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13190 DeconvolutionOperatorTester()
13191 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13192 .padding(1)
13193 .kernel_size(3, 3)
13194 .dilation_height(3)
13195 .stride_height(2)
13196 .group_input_channels(23)
13197 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13198 .iterations(3)
13199 .TestF32();
13200 }
13201
13202 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_width_dilation_and_stride) {
13203 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13204 DeconvolutionOperatorTester()
13205 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13206 .padding(1)
13207 .kernel_size(3, 3)
13208 .dilation_width(3)
13209 .stride_width(2)
13210 .group_input_channels(23)
13211 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13212 .iterations(3)
13213 .TestF32();
13214 }
13215
13216 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_input_stride) {
13217 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13218 DeconvolutionOperatorTester()
13219 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13220 .padding(1)
13221 .kernel_size(3, 3)
13222 .group_input_channels(23)
13223 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13224 .input_pixel_stride(28)
13225 .iterations(3)
13226 .TestF32();
13227 }
13228
13229 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_output_stride) {
13230 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13231 DeconvolutionOperatorTester()
13232 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13233 .padding(1)
13234 .kernel_size(3, 3)
13235 .group_input_channels(23)
13236 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13237 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
13238 .iterations(3)
13239 .TestF32();
13240 }
13241
13242 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_qmin) {
13243 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13244 DeconvolutionOperatorTester()
13245 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13246 .padding(1)
13247 .kernel_size(3, 3)
13248 .group_input_channels(23)
13249 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13250 .qmin(128)
13251 .iterations(3)
13252 .TestF32();
13253 }
13254
13255 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_qmax) {
13256 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13257 DeconvolutionOperatorTester()
13258 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13259 .padding(1)
13260 .kernel_size(3, 3)
13261 .group_input_channels(23)
13262 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13263 .qmax(128)
13264 .iterations(3)
13265 .TestF32();
13266 }
13267
13268 TEST(DECONVOLUTION_NHWC_F32, 3x3_without_bias) {
13269 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13270 DeconvolutionOperatorTester()
13271 .has_bias(false)
13272 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13273 .padding(1)
13274 .kernel_size(3, 3)
13275 .group_input_channels(23)
13276 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13277 .iterations(3)
13278 .TestF32();
13279 }
13280
TEST(DECONVOLUTION_NHWC_F32,weights_cache_3x3)13281 TEST(DECONVOLUTION_NHWC_F32, weights_cache_3x3) {
13282 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13283 DeconvolutionOperatorTester()
13284 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13285 .padding(1)
13286 .kernel_size(3, 3)
13287 .group_input_channels(15)
13288 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13289 .use_weights_cache(true)
13290 .iterations(3)
13291 .TestF32();
13292 }
13293
13294 /**************************** CONV path, grouped ****************************/
13295
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3)13296 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3) {
13297 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13298 DeconvolutionOperatorTester()
13299 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13300 .padding(1)
13301 .kernel_size(3, 3)
13302 .groups(2)
13303 .group_input_channels(15)
13304 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13305 .iterations(3)
13306 .TestF32();
13307 }
13308
TEST(DECONVOLUTION_NHWC_F32,grouped_Kx3)13309 TEST(DECONVOLUTION_NHWC_F32, grouped_Kx3) {
13310 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13311 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13312 DeconvolutionOperatorTester()
13313 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13314 .padding_width(1)
13315 .kernel_size(kernel_height, 3)
13316 .groups(2)
13317 .group_input_channels(17)
13318 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13319 .iterations(3)
13320 .TestF32();
13321 }
13322 }
13323
TEST(DECONVOLUTION_NHWC_F32,grouped_3xK)13324 TEST(DECONVOLUTION_NHWC_F32, grouped_3xK) {
13325 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13326 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13327 DeconvolutionOperatorTester()
13328 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13329 .padding_height(1)
13330 .kernel_size(3, kernel_width)
13331 .groups(2)
13332 .group_input_channels(17)
13333 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13334 .iterations(3)
13335 .TestF32();
13336 }
13337 }
13338
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_height_padding)13339 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_height_padding) {
13340 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13341 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13342 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13343 DeconvolutionOperatorTester()
13344 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13345 .padding_width(1)
13346 .padding_top(padding_top)
13347 .padding_bottom(padding_bottom)
13348 .kernel_size(3, 3)
13349 .groups(2)
13350 .group_input_channels(15)
13351 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13352 .iterations(1)
13353 .TestF32();
13354 }
13355 }
13356 }
13357
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_width_padding)13358 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_width_padding) {
13359 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13360 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
13361 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
13362 DeconvolutionOperatorTester()
13363 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13364 .padding_height(1)
13365 .padding_left(padding_left)
13366 .padding_right(padding_right)
13367 .kernel_size(3, 3)
13368 .groups(2)
13369 .group_input_channels(15)
13370 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13371 .iterations(1)
13372 .TestF32();
13373 }
13374 }
13375 }
13376
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_height_adjustment)13377 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_height_adjustment) {
13378 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13379 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
13380 DeconvolutionOperatorTester()
13381 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13382 .padding(1)
13383 .stride_height(adjustment_height + 1)
13384 .adjustment_height(adjustment_height)
13385 .kernel_size(3, 3)
13386 .groups(2)
13387 .group_input_channels(15)
13388 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13389 .iterations(1)
13390 .TestF32();
13391 }
13392 }
13393
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_width_adjustment)13394 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_width_adjustment) {
13395 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13396 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
13397 DeconvolutionOperatorTester()
13398 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13399 .padding(1)
13400 .stride_width(adjustment_width + 1)
13401 .adjustment_width(adjustment_width)
13402 .kernel_size(3, 3)
13403 .groups(2)
13404 .group_input_channels(15)
13405 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13406 .iterations(1)
13407 .TestF32();
13408 }
13409 }
13410
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_input_height)13411 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_input_height) {
13412 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13413 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
13414 DeconvolutionOperatorTester()
13415 .input_size(input_height, kUnstridedInputWidth)
13416 .padding(1)
13417 .kernel_size(3, 3)
13418 .groups(2)
13419 .group_input_channels(15)
13420 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13421 .iterations(1)
13422 .TestF32();
13423 }
13424 }
13425
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_input_width)13426 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_input_width) {
13427 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13428 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
13429 DeconvolutionOperatorTester()
13430 .input_size(kUnstridedInputHeight, input_width)
13431 .padding(1)
13432 .kernel_size(3, 3)
13433 .groups(2)
13434 .group_input_channels(15)
13435 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13436 .iterations(1)
13437 .TestF32();
13438 }
13439 }
13440
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_input_channels)13441 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_input_channels) {
13442 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13443 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
13444 DeconvolutionOperatorTester()
13445 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13446 .padding(1)
13447 .kernel_size(3, 3)
13448 .groups(2)
13449 .group_input_channels(input_channels)
13450 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13451 .iterations(1)
13452 .TestF32();
13453 }
13454 }
13455
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_output_channels)13456 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_output_channels) {
13457 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13458 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
13459 DeconvolutionOperatorTester()
13460 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13461 .padding(1)
13462 .kernel_size(3, 3)
13463 .groups(2)
13464 .group_input_channels(23)
13465 .group_output_channels(output_channels)
13466 .iterations(1)
13467 .TestF32();
13468 }
13469 }
13470
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_height_dilation)13471 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_height_dilation) {
13472 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13473 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
13474 DeconvolutionOperatorTester()
13475 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13476 .padding(1)
13477 .kernel_size(3, 3)
13478 .dilation_height(dilation_height)
13479 .groups(2)
13480 .group_input_channels(23)
13481 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13482 .iterations(3)
13483 .TestF32();
13484 }
13485 }
13486
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_width_dilation)13487 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_width_dilation) {
13488 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13489 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
13490 DeconvolutionOperatorTester()
13491 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13492 .padding(1)
13493 .kernel_size(3, 3)
13494 .dilation_width(dilation_width)
13495 .groups(2)
13496 .group_input_channels(23)
13497 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13498 .iterations(3)
13499 .TestF32();
13500 }
13501 }
13502
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_height_dilation_and_stride)13503 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_height_dilation_and_stride) {
13504 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13505 DeconvolutionOperatorTester()
13506 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13507 .padding(1)
13508 .kernel_size(3, 3)
13509 .dilation_height(3)
13510 .stride_height(2)
13511 .groups(2)
13512 .group_input_channels(23)
13513 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13514 .iterations(3)
13515 .TestF32();
13516 }
13517
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_width_dilation_and_stride)13518 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_width_dilation_and_stride) {
13519 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13520 DeconvolutionOperatorTester()
13521 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13522 .padding(1)
13523 .kernel_size(3, 3)
13524 .dilation_width(3)
13525 .stride_width(2)
13526 .groups(2)
13527 .group_input_channels(23)
13528 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13529 .iterations(3)
13530 .TestF32();
13531 }
13532
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_input_stride)13533 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_input_stride) {
13534 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13535 DeconvolutionOperatorTester()
13536 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13537 .padding(1)
13538 .kernel_size(3, 3)
13539 .groups(2)
13540 .group_input_channels(23)
13541 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13542 .input_pixel_stride(47)
13543 .iterations(3)
13544 .TestF32();
13545 }
13546
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_output_stride)13547 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_output_stride) {
13548 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13549 DeconvolutionOperatorTester()
13550 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13551 .padding(1)
13552 .kernel_size(3, 3)
13553 .groups(2)
13554 .group_input_channels(23)
13555 .group_output_channels(xnn_params.f32.gemm.nr + 3)
13556 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
13557 .iterations(3)
13558 .TestF32();
13559 }
13560
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_qmin)13561 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_qmin) {
13562 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13563 DeconvolutionOperatorTester()
13564 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13565 .padding(1)
13566 .kernel_size(3, 3)
13567 .groups(2)
13568 .group_input_channels(23)
13569 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13570 .qmin(128)
13571 .iterations(3)
13572 .TestF32();
13573 }
13574
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_qmax)13575 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_qmax) {
13576 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13577 DeconvolutionOperatorTester()
13578 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13579 .padding(1)
13580 .kernel_size(3, 3)
13581 .groups(2)
13582 .group_input_channels(23)
13583 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13584 .qmax(128)
13585 .iterations(3)
13586 .TestF32();
13587 }
13588
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_without_bias)13589 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_without_bias) {
13590 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13591 DeconvolutionOperatorTester()
13592 .has_bias(false)
13593 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13594 .padding(1)
13595 .kernel_size(3, 3)
13596 .groups(2)
13597 .group_input_channels(23)
13598 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13599 .iterations(3)
13600 .TestF32();
13601 }
13602
TEST(DECONVOLUTION_NHWC_F32,weights_cache_grouped_3x3)13603 TEST(DECONVOLUTION_NHWC_F32, weights_cache_grouped_3x3) {
13604 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13605 DeconvolutionOperatorTester()
13606 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13607 .padding(1)
13608 .kernel_size(3, 3)
13609 .groups(2)
13610 .group_input_channels(15)
13611 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13612 .use_weights_cache(true)
13613 .iterations(3)
13614 .TestF32();
13615 }
13616
13617 /**************************** CONV path, batched ****************************/
13618
TEST(DECONVOLUTION_NHWC_F32,batched_3x3)13619 TEST(DECONVOLUTION_NHWC_F32, batched_3x3) {
13620 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13621 DeconvolutionOperatorTester()
13622 .batch_size(2)
13623 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13624 .padding(1)
13625 .kernel_size(3, 3)
13626 .group_input_channels(15)
13627 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13628 .iterations(3)
13629 .TestF32();
13630 }
13631
TEST(DECONVOLUTION_NHWC_F32,batched_Kx3)13632 TEST(DECONVOLUTION_NHWC_F32, batched_Kx3) {
13633 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13634 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13635 DeconvolutionOperatorTester()
13636 .batch_size(2)
13637 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13638 .padding_width(1)
13639 .kernel_size(kernel_height, 3)
13640 .group_input_channels(17)
13641 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13642 .iterations(3)
13643 .TestF32();
13644 }
13645 }
13646
TEST(DECONVOLUTION_NHWC_F32,batched_3xK)13647 TEST(DECONVOLUTION_NHWC_F32, batched_3xK) {
13648 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13649 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13650 DeconvolutionOperatorTester()
13651 .batch_size(2)
13652 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13653 .padding_height(1)
13654 .kernel_size(3, kernel_width)
13655 .group_input_channels(17)
13656 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13657 .iterations(3)
13658 .TestF32();
13659 }
13660 }
13661
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_height_padding)13662 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_height_padding) {
13663 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13664 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13665 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13666 DeconvolutionOperatorTester()
13667 .batch_size(2)
13668 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13669 .padding_width(1)
13670 .padding_top(padding_top)
13671 .padding_bottom(padding_bottom)
13672 .kernel_size(3, 3)
13673 .group_input_channels(15)
13674 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13675 .iterations(1)
13676 .TestF32();
13677 }
13678 }
13679 }
13680
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_width_padding)13681 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_width_padding) {
13682 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13683 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
13684 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
13685 DeconvolutionOperatorTester()
13686 .batch_size(2)
13687 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13688 .padding_height(1)
13689 .padding_left(padding_left)
13690 .padding_right(padding_right)
13691 .kernel_size(3, 3)
13692 .group_input_channels(15)
13693 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13694 .iterations(1)
13695 .TestF32();
13696 }
13697 }
13698 }
13699
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_height_adjustment)13700 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_height_adjustment) {
13701 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13702 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
13703 DeconvolutionOperatorTester()
13704 .batch_size(2)
13705 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13706 .padding(1)
13707 .stride_height(adjustment_height + 1)
13708 .adjustment_height(adjustment_height)
13709 .kernel_size(3, 3)
13710 .group_input_channels(15)
13711 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13712 .iterations(1)
13713 .TestF32();
13714 }
13715 }
13716
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_width_adjustment)13717 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_width_adjustment) {
13718 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13719 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
13720 DeconvolutionOperatorTester()
13721 .batch_size(2)
13722 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13723 .padding(1)
13724 .stride_width(adjustment_width + 1)
13725 .adjustment_width(adjustment_width)
13726 .kernel_size(3, 3)
13727 .group_input_channels(15)
13728 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13729 .iterations(1)
13730 .TestF32();
13731 }
13732 }
13733
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_input_height)13734 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_input_height) {
13735 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13736 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
13737 DeconvolutionOperatorTester()
13738 .batch_size(2)
13739 .input_size(input_height, kUnstridedInputWidth)
13740 .padding(1)
13741 .kernel_size(3, 3)
13742 .group_input_channels(15)
13743 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13744 .iterations(1)
13745 .TestF32();
13746 }
13747 }
13748
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_input_width)13749 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_input_width) {
13750 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13751 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
13752 DeconvolutionOperatorTester()
13753 .batch_size(2)
13754 .input_size(kUnstridedInputHeight, input_width)
13755 .padding(1)
13756 .kernel_size(3, 3)
13757 .group_input_channels(15)
13758 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13759 .iterations(1)
13760 .TestF32();
13761 }
13762 }
13763
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_input_channels)13764 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_input_channels) {
13765 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13766 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
13767 DeconvolutionOperatorTester()
13768 .batch_size(2)
13769 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13770 .padding(1)
13771 .kernel_size(3, 3)
13772 .group_input_channels(input_channels)
13773 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13774 .iterations(1)
13775 .TestF32();
13776 }
13777 }
13778
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_output_channels)13779 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_output_channels) {
13780 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13781 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
13782 DeconvolutionOperatorTester()
13783 .batch_size(2)
13784 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13785 .padding(1)
13786 .kernel_size(3, 3)
13787 .group_input_channels(23)
13788 .group_output_channels(output_channels)
13789 .iterations(1)
13790 .TestF32();
13791 }
13792 }
13793
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_height_dilation)13794 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_height_dilation) {
13795 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13796 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
13797 DeconvolutionOperatorTester()
13798 .batch_size(2)
13799 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13800 .padding(1)
13801 .kernel_size(3, 3)
13802 .dilation_height(dilation_height)
13803 .group_input_channels(23)
13804 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13805 .iterations(3)
13806 .TestF32();
13807 }
13808 }
13809
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_width_dilation)13810 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_width_dilation) {
13811 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13812 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
13813 DeconvolutionOperatorTester()
13814 .batch_size(2)
13815 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13816 .padding(1)
13817 .kernel_size(3, 3)
13818 .dilation_width(dilation_width)
13819 .group_input_channels(23)
13820 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13821 .iterations(3)
13822 .TestF32();
13823 }
13824 }
13825
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_height_dilation_and_stride)13826 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_height_dilation_and_stride) {
13827 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13828 DeconvolutionOperatorTester()
13829 .batch_size(2)
13830 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13831 .padding(1)
13832 .kernel_size(3, 3)
13833 .dilation_height(3)
13834 .stride_height(2)
13835 .group_input_channels(23)
13836 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13837 .iterations(3)
13838 .TestF32();
13839 }
13840
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_width_dilation_and_stride)13841 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_width_dilation_and_stride) {
13842 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13843 DeconvolutionOperatorTester()
13844 .batch_size(2)
13845 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13846 .padding(1)
13847 .kernel_size(3, 3)
13848 .dilation_width(3)
13849 .stride_width(2)
13850 .group_input_channels(23)
13851 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13852 .iterations(3)
13853 .TestF32();
13854 }
13855
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_input_stride)13856 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_input_stride) {
13857 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13858 DeconvolutionOperatorTester()
13859 .batch_size(2)
13860 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13861 .padding(1)
13862 .kernel_size(3, 3)
13863 .group_input_channels(23)
13864 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13865 .input_pixel_stride(28)
13866 .iterations(3)
13867 .TestF32();
13868 }
13869
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_output_stride)13870 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_output_stride) {
13871 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13872 DeconvolutionOperatorTester()
13873 .batch_size(2)
13874 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13875 .padding(1)
13876 .kernel_size(3, 3)
13877 .group_input_channels(23)
13878 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13879 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
13880 .iterations(3)
13881 .TestF32();
13882 }
13883
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_qmin)13884 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_qmin) {
13885 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13886 DeconvolutionOperatorTester()
13887 .batch_size(2)
13888 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13889 .padding(1)
13890 .kernel_size(3, 3)
13891 .group_input_channels(23)
13892 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13893 .qmin(128)
13894 .iterations(3)
13895 .TestF32();
13896 }
13897
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_qmax)13898 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_qmax) {
13899 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13900 DeconvolutionOperatorTester()
13901 .batch_size(2)
13902 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13903 .padding(1)
13904 .kernel_size(3, 3)
13905 .group_input_channels(23)
13906 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13907 .qmax(128)
13908 .iterations(3)
13909 .TestF32();
13910 }
13911
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_without_bias)13912 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_without_bias) {
13913 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13914 DeconvolutionOperatorTester()
13915 .has_bias(false)
13916 .batch_size(2)
13917 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13918 .padding(1)
13919 .kernel_size(3, 3)
13920 .group_input_channels(23)
13921 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13922 .iterations(3)
13923 .TestF32();
13924 }
13925
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_3x3)13926 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_3x3) {
13927 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13928 DeconvolutionOperatorTester()
13929 .batch_size(2)
13930 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13931 .padding(1)
13932 .kernel_size(3, 3)
13933 .group_input_channels(15)
13934 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13935 .use_weights_cache(true)
13936 .iterations(3)
13937 .TestF32();
13938 }
13939
13940
13941 /**************************** CONV path, grouped, batched ****************************/
13942
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3)13943 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3) {
13944 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13945 DeconvolutionOperatorTester()
13946 .batch_size(2)
13947 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13948 .padding(1)
13949 .kernel_size(3, 3)
13950 .groups(2)
13951 .group_input_channels(15)
13952 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13953 .iterations(3)
13954 .TestF32();
13955 }
13956
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_Kx3)13957 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_Kx3) {
13958 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13959 for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13960 DeconvolutionOperatorTester()
13961 .batch_size(2)
13962 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13963 .padding_width(1)
13964 .kernel_size(kernel_height, 3)
13965 .groups(2)
13966 .group_input_channels(17)
13967 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13968 .iterations(3)
13969 .TestF32();
13970 }
13971 }
13972
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3xK)13973 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3xK) {
13974 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13975 for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13976 DeconvolutionOperatorTester()
13977 .batch_size(2)
13978 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13979 .padding_height(1)
13980 .kernel_size(3, kernel_width)
13981 .groups(2)
13982 .group_input_channels(17)
13983 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13984 .iterations(3)
13985 .TestF32();
13986 }
13987 }
13988
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_height_padding)13989 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_height_padding) {
13990 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13991 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13992 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13993 DeconvolutionOperatorTester()
13994 .batch_size(2)
13995 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13996 .padding_width(1)
13997 .padding_top(padding_top)
13998 .padding_bottom(padding_bottom)
13999 .kernel_size(3, 3)
14000 .groups(2)
14001 .group_input_channels(15)
14002 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14003 .iterations(1)
14004 .TestF32();
14005 }
14006 }
14007 }
14008
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_width_padding)14009 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_width_padding) {
14010 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14011 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
14012 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
14013 DeconvolutionOperatorTester()
14014 .batch_size(2)
14015 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14016 .padding_height(1)
14017 .padding_left(padding_left)
14018 .padding_right(padding_right)
14019 .kernel_size(3, 3)
14020 .groups(2)
14021 .group_input_channels(15)
14022 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14023 .iterations(1)
14024 .TestF32();
14025 }
14026 }
14027 }
14028
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_height_adjustment)14029 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_height_adjustment) {
14030 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14031 for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
14032 DeconvolutionOperatorTester()
14033 .batch_size(2)
14034 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14035 .padding(1)
14036 .stride_height(adjustment_height + 1)
14037 .adjustment_height(adjustment_height)
14038 .kernel_size(3, 3)
14039 .groups(2)
14040 .group_input_channels(15)
14041 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14042 .iterations(1)
14043 .TestF32();
14044 }
14045 }
14046
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_width_adjustment)14047 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_width_adjustment) {
14048 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14049 for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
14050 DeconvolutionOperatorTester()
14051 .batch_size(2)
14052 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14053 .padding(1)
14054 .stride_width(adjustment_width + 1)
14055 .adjustment_width(adjustment_width)
14056 .kernel_size(3, 3)
14057 .groups(2)
14058 .group_input_channels(15)
14059 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14060 .iterations(1)
14061 .TestF32();
14062 }
14063 }
14064
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_input_height)14065 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_input_height) {
14066 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14067 for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
14068 DeconvolutionOperatorTester()
14069 .batch_size(2)
14070 .input_size(input_height, kUnstridedInputWidth)
14071 .padding(1)
14072 .kernel_size(3, 3)
14073 .groups(2)
14074 .group_input_channels(15)
14075 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14076 .iterations(1)
14077 .TestF32();
14078 }
14079 }
14080
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_input_width)14081 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_input_width) {
14082 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14083 for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
14084 DeconvolutionOperatorTester()
14085 .batch_size(2)
14086 .input_size(kUnstridedInputHeight, input_width)
14087 .padding(1)
14088 .kernel_size(3, 3)
14089 .groups(2)
14090 .group_input_channels(15)
14091 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14092 .iterations(1)
14093 .TestF32();
14094 }
14095 }
14096
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_input_channels)14097 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_input_channels) {
14098 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14099 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
14100 DeconvolutionOperatorTester()
14101 .batch_size(2)
14102 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14103 .padding(1)
14104 .kernel_size(3, 3)
14105 .groups(2)
14106 .group_input_channels(input_channels)
14107 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14108 .iterations(1)
14109 .TestF32();
14110 }
14111 }
14112
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_output_channels)14113 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_output_channels) {
14114 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14115 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
14116 DeconvolutionOperatorTester()
14117 .batch_size(2)
14118 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14119 .padding(1)
14120 .kernel_size(3, 3)
14121 .groups(2)
14122 .group_input_channels(23)
14123 .group_output_channels(output_channels)
14124 .iterations(1)
14125 .TestF32();
14126 }
14127 }
14128
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_height_dilation)14129 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_height_dilation) {
14130 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14131 for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
14132 DeconvolutionOperatorTester()
14133 .batch_size(2)
14134 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14135 .padding(1)
14136 .kernel_size(3, 3)
14137 .dilation_height(dilation_height)
14138 .groups(2)
14139 .group_input_channels(23)
14140 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14141 .iterations(3)
14142 .TestF32();
14143 }
14144 }
14145
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_width_dilation)14146 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_width_dilation) {
14147 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14148 for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
14149 DeconvolutionOperatorTester()
14150 .batch_size(2)
14151 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14152 .padding(1)
14153 .kernel_size(3, 3)
14154 .dilation_width(dilation_width)
14155 .groups(2)
14156 .group_input_channels(23)
14157 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14158 .iterations(3)
14159 .TestF32();
14160 }
14161 }
14162
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_height_dilation_and_stride)14163 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_height_dilation_and_stride) {
14164 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14165 DeconvolutionOperatorTester()
14166 .batch_size(2)
14167 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14168 .padding(1)
14169 .kernel_size(3, 3)
14170 .dilation_height(3)
14171 .stride_width(2)
14172 .groups(2)
14173 .group_input_channels(23)
14174 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14175 .iterations(3)
14176 .TestF32();
14177 }
14178
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_width_dilation_and_stride)14179 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_width_dilation_and_stride) {
14180 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14181 DeconvolutionOperatorTester()
14182 .batch_size(2)
14183 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14184 .padding(1)
14185 .kernel_size(3, 3)
14186 .dilation_width(3)
14187 .stride_width(2)
14188 .groups(2)
14189 .group_input_channels(23)
14190 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14191 .iterations(3)
14192 .TestF32();
14193 }
14194
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_input_stride)14195 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_input_stride) {
14196 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14197 DeconvolutionOperatorTester()
14198 .batch_size(2)
14199 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14200 .padding(1)
14201 .kernel_size(3, 3)
14202 .groups(2)
14203 .group_input_channels(23)
14204 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14205 .input_pixel_stride(47)
14206 .iterations(3)
14207 .TestF32();
14208 }
14209
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_output_stride)14210 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_output_stride) {
14211 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14212 DeconvolutionOperatorTester()
14213 .batch_size(2)
14214 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14215 .padding(1)
14216 .kernel_size(3, 3)
14217 .groups(2)
14218 .group_input_channels(23)
14219 .group_output_channels(xnn_params.f32.gemm.nr + 3)
14220 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
14221 .iterations(3)
14222 .TestF32();
14223 }
14224
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_qmin)14225 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_qmin) {
14226 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14227 DeconvolutionOperatorTester()
14228 .batch_size(2)
14229 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14230 .padding(1)
14231 .kernel_size(3, 3)
14232 .groups(2)
14233 .group_input_channels(23)
14234 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14235 .qmin(128)
14236 .iterations(3)
14237 .TestF32();
14238 }
14239
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_qmax)14240 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_qmax) {
14241 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14242 DeconvolutionOperatorTester()
14243 .batch_size(2)
14244 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14245 .padding(1)
14246 .kernel_size(3, 3)
14247 .groups(2)
14248 .group_input_channels(23)
14249 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14250 .qmax(128)
14251 .iterations(3)
14252 .TestF32();
14253 }
14254
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_without_bias)14255 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_without_bias) {
14256 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14257 DeconvolutionOperatorTester()
14258 .has_bias(false)
14259 .batch_size(2)
14260 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14261 .padding(1)
14262 .kernel_size(3, 3)
14263 .groups(2)
14264 .group_input_channels(23)
14265 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14266 .iterations(3)
14267 .TestF32();
14268 }
14269
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_grouped_3x3)14270 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_grouped_3x3) {
14271 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14272 DeconvolutionOperatorTester()
14273 .batch_size(2)
14274 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14275 .padding(1)
14276 .kernel_size(3, 3)
14277 .groups(2)
14278 .group_input_channels(15)
14279 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14280 .use_weights_cache(true)
14281 .iterations(3)
14282 .TestF32();
14283 }
14284
14285 /**************************** CONV path, setup ****************************/
14286
14287 TEST(DECONVOLUTION_NHWC_F32, 3x3_setup_changing_batch) {
14288 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14289 DeconvolutionOperatorTester()
14290 .batch_size(2)
14291 .next_batch_size(5)
14292 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14293 .kernel_height(3)
14294 .kernel_width(5)
14295 .groups(2)
14296 .group_input_channels(15)
14297 .group_output_channels(17)
14298 .TestSetupF32();
14299 }
14300
14301 TEST(DECONVOLUTION_NHWC_F32, 3x3_setup_changing_height) {
14302 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14303 DeconvolutionOperatorTester()
14304 .batch_size(2)
14305 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14306 .next_input_height(kUnstridedInputHeight + 3)
14307 .kernel_height(3)
14308 .kernel_width(5)
14309 .groups(2)
14310 .group_input_channels(15)
14311 .group_output_channels(17)
14312 .TestSetupF32();
14313 }
14314
14315 TEST(DECONVOLUTION_NHWC_F32, 3x3_setup_changing_width) {
14316 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14317 DeconvolutionOperatorTester()
14318 .batch_size(2)
14319 .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14320 .next_input_width(kUnstridedInputWidth + 3)
14321 .kernel_height(3)
14322 .kernel_width(5)
14323 .groups(2)
14324 .group_input_channels(15)
14325 .group_output_channels(17)
14326 .TestSetupF32();
14327 }
14328
14329 /**************************** SUBCONV2D/IGEMM path ****************************/
14330
14331 TEST(DECONVOLUTION_NHWC_F32, 3x3s2) {
14332 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14333 DeconvolutionOperatorTester()
14334 .input_size(kStridedInputHeight, kStridedInputWidth)
14335 .padding(1)
14336 .kernel_size(3, 3)
14337 .stride(2)
14338 .group_input_channels(15)
14339 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14340 .iterations(3)
14341 .TestF32();
14342 }
14343
TEST(DECONVOLUTION_NHWC_F32,Kx3s2)14344 TEST(DECONVOLUTION_NHWC_F32, Kx3s2) {
14345 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14346 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
14347 DeconvolutionOperatorTester()
14348 .input_size(kStridedInputHeight, kStridedInputWidth)
14349 .padding_width(1)
14350 .kernel_size(kernel_height, 3)
14351 .stride(2)
14352 .group_input_channels(17)
14353 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14354 .iterations(3)
14355 .TestF32();
14356 }
14357 }
14358
14359 TEST(DECONVOLUTION_NHWC_F32, 3xKs2) {
14360 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14361 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
14362 DeconvolutionOperatorTester()
14363 .input_size(kStridedInputHeight, kStridedInputWidth)
14364 .padding_height(1)
14365 .kernel_size(3, kernel_width)
14366 .stride(2)
14367 .group_input_channels(17)
14368 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14369 .iterations(3)
14370 .TestF32();
14371 }
14372 }
14373
14374 TEST(DECONVOLUTION_NHWC_F32, 3x3sSx1) {
14375 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14376 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
14377 DeconvolutionOperatorTester()
14378 .input_size(kStridedInputHeight, kStridedInputWidth)
14379 .padding(1)
14380 .padding_width(1)
14381 .kernel_size(3, 3)
14382 .stride_height(stride_height)
14383 .group_input_channels(17)
14384 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14385 .iterations(3)
14386 .TestF32();
14387 }
14388 }
14389
14390 TEST(DECONVOLUTION_NHWC_F32, 3x3s1xS) {
14391 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14392 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
14393 DeconvolutionOperatorTester()
14394 .input_size(kStridedInputHeight, kStridedInputWidth)
14395 .padding(1)
14396 .padding_width(1)
14397 .kernel_size(3, 3)
14398 .stride_width(stride_width)
14399 .group_input_channels(17)
14400 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14401 .iterations(3)
14402 .TestF32();
14403 }
14404 }
14405
14406 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_height_padding) {
14407 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14408 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
14409 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
14410 DeconvolutionOperatorTester()
14411 .input_size(kStridedInputHeight, kStridedInputWidth)
14412 .padding_width(1)
14413 .padding_top(padding_top)
14414 .padding_bottom(padding_bottom)
14415 .kernel_size(3, 3)
14416 .stride(2)
14417 .group_input_channels(15)
14418 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14419 .iterations(1)
14420 .TestF32();
14421 }
14422 }
14423 }
14424
14425 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_width_padding) {
14426 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14427 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
14428 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
14429 DeconvolutionOperatorTester()
14430 .input_size(kStridedInputHeight, kStridedInputWidth)
14431 .padding_height(1)
14432 .padding_left(padding_left)
14433 .padding_right(padding_right)
14434 .kernel_size(3, 3)
14435 .stride(2)
14436 .group_input_channels(15)
14437 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14438 .iterations(1)
14439 .TestF32();
14440 }
14441 }
14442 }
14443
14444 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_height_adjustment) {
14445 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14446 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
14447 DeconvolutionOperatorTester()
14448 .input_size(kStridedInputHeight, kStridedInputWidth)
14449 .padding(1)
14450 .adjustment_height(adjustment_height)
14451 .kernel_size(3, 3)
14452 .stride(2)
14453 .group_input_channels(15)
14454 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14455 .iterations(1)
14456 .TestF32();
14457 }
14458 }
14459
14460 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_width_adjustment) {
14461 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14462 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
14463 DeconvolutionOperatorTester()
14464 .input_size(kStridedInputHeight, kStridedInputWidth)
14465 .padding(1)
14466 .adjustment_width(adjustment_width)
14467 .kernel_size(3, 3)
14468 .stride(2)
14469 .group_input_channels(15)
14470 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14471 .iterations(1)
14472 .TestF32();
14473 }
14474 }
14475
14476 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_input_height) {
14477 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14478 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
14479 DeconvolutionOperatorTester()
14480 .input_size(input_height, kStridedInputWidth)
14481 .padding(1)
14482 .kernel_size(3, 3)
14483 .stride(2)
14484 .group_input_channels(15)
14485 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14486 .iterations(1)
14487 .TestF32();
14488 }
14489 }
14490
14491 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_input_width) {
14492 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14493 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
14494 DeconvolutionOperatorTester()
14495 .input_size(kStridedInputHeight, kStridedInputWidth)
14496 .padding(1)
14497 .kernel_size(3, 3)
14498 .stride(2)
14499 .group_input_channels(15)
14500 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14501 .iterations(1)
14502 .TestF32();
14503 }
14504 }
14505
14506 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_input_channels) {
14507 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14508 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
14509 DeconvolutionOperatorTester()
14510 .input_size(kStridedInputHeight, kStridedInputWidth)
14511 .padding(1)
14512 .kernel_size(3, 3)
14513 .stride(2)
14514 .group_input_channels(input_channels)
14515 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14516 .iterations(1)
14517 .TestF32();
14518 }
14519 }
14520
14521 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_output_channels) {
14522 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14523 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
14524 DeconvolutionOperatorTester()
14525 .input_size(kStridedInputHeight, kStridedInputWidth)
14526 .padding(1)
14527 .kernel_size(3, 3)
14528 .stride(2)
14529 .group_input_channels(23)
14530 .group_output_channels(output_channels)
14531 .iterations(1)
14532 .TestF32();
14533 }
14534 }
14535
14536 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_input_stride) {
14537 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14538 DeconvolutionOperatorTester()
14539 .input_size(kStridedInputHeight, kStridedInputWidth)
14540 .padding(1)
14541 .kernel_size(3, 3)
14542 .stride(2)
14543 .group_input_channels(23)
14544 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14545 .input_pixel_stride(28)
14546 .iterations(3)
14547 .TestF32();
14548 }
14549
14550 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_output_stride) {
14551 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14552 DeconvolutionOperatorTester()
14553 .input_size(kStridedInputHeight, kStridedInputWidth)
14554 .padding(1)
14555 .kernel_size(3, 3)
14556 .stride(2)
14557 .group_input_channels(23)
14558 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14559 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
14560 .iterations(3)
14561 .TestF32();
14562 }
14563
14564 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_qmin) {
14565 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14566 DeconvolutionOperatorTester()
14567 .input_size(kStridedInputHeight, kStridedInputWidth)
14568 .padding(1)
14569 .kernel_size(3, 3)
14570 .stride(2)
14571 .group_input_channels(23)
14572 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14573 .qmin(128)
14574 .iterations(3)
14575 .TestF32();
14576 }
14577
14578 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_qmax) {
14579 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14580 DeconvolutionOperatorTester()
14581 .input_size(kStridedInputHeight, kStridedInputWidth)
14582 .padding(1)
14583 .kernel_size(3, 3)
14584 .stride(2)
14585 .group_input_channels(23)
14586 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14587 .qmax(128)
14588 .iterations(3)
14589 .TestF32();
14590 }
14591
14592 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_without_bias) {
14593 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14594 DeconvolutionOperatorTester()
14595 .has_bias(false)
14596 .input_size(kStridedInputHeight, kStridedInputWidth)
14597 .padding(1)
14598 .kernel_size(3, 3)
14599 .stride(2)
14600 .group_input_channels(23)
14601 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14602 .iterations(3)
14603 .TestF32();
14604 }
14605
TEST(DECONVOLUTION_NHWC_F32,weights_cache_3x3s2)14606 TEST(DECONVOLUTION_NHWC_F32, weights_cache_3x3s2) {
14607 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14608 DeconvolutionOperatorTester()
14609 .input_size(kStridedInputHeight, kStridedInputWidth)
14610 .padding(1)
14611 .kernel_size(3, 3)
14612 .stride(2)
14613 .group_input_channels(15)
14614 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14615 .use_weights_cache(true)
14616 .iterations(3)
14617 .TestF32();
14618 }
14619
TEST(DECONVOLUTION_NHWC_F32,stress_weights_cache_5x5s4)14620 TEST(DECONVOLUTION_NHWC_F32, stress_weights_cache_5x5s4) {
14621 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14622 DeconvolutionOperatorTester()
14623 .input_size(kStridedInputHeight, kStridedInputWidth)
14624 .padding(1)
14625 .kernel_size(5, 5)
14626 .stride(4)
14627 .group_input_channels(15)
14628 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14629 .iterations(60) // Higher number of iterations to write more weights.
14630 .StressWeightsCacheTestF32();
14631 }
14632
14633 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
14634
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2)14635 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2) {
14636 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14637 DeconvolutionOperatorTester()
14638 .input_size(kStridedInputHeight, kStridedInputWidth)
14639 .padding(1)
14640 .kernel_size(3, 3)
14641 .stride(2)
14642 .groups(2)
14643 .group_input_channels(17)
14644 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14645 .iterations(3)
14646 .TestF32();
14647 }
14648
TEST(DECONVOLUTION_NHWC_F32,grouped_Kx3s2)14649 TEST(DECONVOLUTION_NHWC_F32, grouped_Kx3s2) {
14650 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14651 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
14652 DeconvolutionOperatorTester()
14653 .input_size(kStridedInputHeight, kStridedInputWidth)
14654 .padding_width(1)
14655 .kernel_size(kernel_height, 3)
14656 .stride(2)
14657 .groups(2)
14658 .group_input_channels(17)
14659 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14660 .iterations(3)
14661 .TestF32();
14662 }
14663 }
14664
TEST(DECONVOLUTION_NHWC_F32,grouped_3xKs2)14665 TEST(DECONVOLUTION_NHWC_F32, grouped_3xKs2) {
14666 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14667 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
14668 DeconvolutionOperatorTester()
14669 .input_size(kStridedInputHeight, kStridedInputWidth)
14670 .padding_height(1)
14671 .kernel_size(3, kernel_width)
14672 .stride(2)
14673 .groups(2)
14674 .group_input_channels(17)
14675 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14676 .iterations(3)
14677 .TestF32();
14678 }
14679 }
14680
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3sSx1)14681 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3sSx1) {
14682 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14683 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
14684 DeconvolutionOperatorTester()
14685 .input_size(kStridedInputHeight, kStridedInputWidth)
14686 .padding(1)
14687 .padding_width(1)
14688 .kernel_size(3, 3)
14689 .stride_height(stride_height)
14690 .groups(2)
14691 .group_input_channels(17)
14692 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14693 .iterations(3)
14694 .TestF32();
14695 }
14696 }
14697
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s1xS)14698 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s1xS) {
14699 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14700 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
14701 DeconvolutionOperatorTester()
14702 .input_size(kStridedInputHeight, kStridedInputWidth)
14703 .padding(1)
14704 .padding_width(1)
14705 .kernel_size(3, 3)
14706 .stride_width(stride_width)
14707 .groups(2)
14708 .group_input_channels(17)
14709 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14710 .iterations(3)
14711 .TestF32();
14712 }
14713 }
14714
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_height_padding)14715 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_height_padding) {
14716 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14717 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
14718 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
14719 DeconvolutionOperatorTester()
14720 .input_size(kStridedInputHeight, kStridedInputWidth)
14721 .padding_width(1)
14722 .padding_top(padding_top)
14723 .padding_bottom(padding_bottom)
14724 .kernel_size(3, 3)
14725 .stride(2)
14726 .groups(2)
14727 .group_input_channels(17)
14728 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14729 .iterations(1)
14730 .TestF32();
14731 }
14732 }
14733 }
14734
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_width_padding)14735 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_width_padding) {
14736 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14737 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
14738 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
14739 DeconvolutionOperatorTester()
14740 .input_size(kStridedInputHeight, kStridedInputWidth)
14741 .padding_height(1)
14742 .padding_left(padding_left)
14743 .padding_right(padding_right)
14744 .kernel_size(3, 3)
14745 .stride(2)
14746 .groups(2)
14747 .group_input_channels(17)
14748 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14749 .iterations(1)
14750 .TestF32();
14751 }
14752 }
14753 }
14754
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_height_adjustment)14755 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_height_adjustment) {
14756 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14757 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
14758 DeconvolutionOperatorTester()
14759 .input_size(kStridedInputHeight, kStridedInputWidth)
14760 .padding(1)
14761 .adjustment_height(adjustment_height)
14762 .kernel_size(3, 3)
14763 .stride(2)
14764 .groups(2)
14765 .group_input_channels(17)
14766 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14767 .iterations(1)
14768 .TestF32();
14769 }
14770 }
14771
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_width_adjustment)14772 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_width_adjustment) {
14773 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14774 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
14775 DeconvolutionOperatorTester()
14776 .input_size(kStridedInputHeight, kStridedInputWidth)
14777 .padding(1)
14778 .adjustment_width(adjustment_width)
14779 .kernel_size(3, 3)
14780 .stride(2)
14781 .groups(2)
14782 .group_input_channels(17)
14783 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14784 .iterations(1)
14785 .TestF32();
14786 }
14787 }
14788
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_input_height)14789 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_input_height) {
14790 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14791 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
14792 DeconvolutionOperatorTester()
14793 .input_size(input_height, kStridedInputWidth)
14794 .padding(1)
14795 .kernel_size(3, 3)
14796 .stride(2)
14797 .groups(2)
14798 .group_input_channels(17)
14799 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14800 .iterations(1)
14801 .TestF32();
14802 }
14803 }
14804
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_input_width)14805 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_input_width) {
14806 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14807 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
14808 DeconvolutionOperatorTester()
14809 .input_size(kStridedInputHeight, kStridedInputWidth)
14810 .padding(1)
14811 .kernel_size(3, 3)
14812 .stride(2)
14813 .groups(2)
14814 .group_input_channels(17)
14815 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14816 .iterations(1)
14817 .TestF32();
14818 }
14819 }
14820
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_input_channels)14821 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_input_channels) {
14822 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14823 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
14824 DeconvolutionOperatorTester()
14825 .input_size(kStridedInputHeight, kStridedInputWidth)
14826 .padding(1)
14827 .kernel_size(3, 3)
14828 .stride(2)
14829 .groups(2)
14830 .group_input_channels(input_channels)
14831 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14832 .iterations(1)
14833 .TestF32();
14834 }
14835 }
14836
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_output_channels)14837 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_output_channels) {
14838 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14839 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
14840 DeconvolutionOperatorTester()
14841 .input_size(kStridedInputHeight, kStridedInputWidth)
14842 .padding(1)
14843 .kernel_size(3, 3)
14844 .stride(2)
14845 .groups(2)
14846 .group_input_channels(17)
14847 .group_output_channels(output_channels)
14848 .iterations(1)
14849 .TestF32();
14850 }
14851 }
14852
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_input_stride)14853 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_input_stride) {
14854 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14855 DeconvolutionOperatorTester()
14856 .input_size(kStridedInputHeight, kStridedInputWidth)
14857 .padding(1)
14858 .kernel_size(3, 3)
14859 .stride(2)
14860 .groups(2)
14861 .group_input_channels(17)
14862 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14863 .input_pixel_stride(37)
14864 .iterations(3)
14865 .TestF32();
14866 }
14867
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_output_stride)14868 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_output_stride) {
14869 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14870 DeconvolutionOperatorTester()
14871 .input_size(kStridedInputHeight, kStridedInputWidth)
14872 .padding(1)
14873 .kernel_size(3, 3)
14874 .stride(2)
14875 .groups(2)
14876 .group_input_channels(17)
14877 .group_output_channels(xnn_params.f32.gemm.nr + 3)
14878 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
14879 .iterations(3)
14880 .TestF32();
14881 }
14882
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_qmin)14883 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_qmin) {
14884 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14885 DeconvolutionOperatorTester()
14886 .input_size(kStridedInputHeight, kStridedInputWidth)
14887 .padding(1)
14888 .kernel_size(3, 3)
14889 .stride(2)
14890 .groups(2)
14891 .group_input_channels(17)
14892 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14893 .qmin(128)
14894 .iterations(3)
14895 .TestF32();
14896 }
14897
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_qmax)14898 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_qmax) {
14899 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14900 DeconvolutionOperatorTester()
14901 .input_size(kStridedInputHeight, kStridedInputWidth)
14902 .padding(1)
14903 .kernel_size(3, 3)
14904 .stride(2)
14905 .groups(2)
14906 .group_input_channels(17)
14907 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14908 .qmax(128)
14909 .iterations(3)
14910 .TestF32();
14911 }
14912
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_without_bias)14913 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_without_bias) {
14914 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14915 DeconvolutionOperatorTester()
14916 .has_bias(false)
14917 .input_size(kStridedInputHeight, kStridedInputWidth)
14918 .padding(1)
14919 .kernel_size(3, 3)
14920 .stride(2)
14921 .groups(2)
14922 .group_input_channels(17)
14923 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14924 .iterations(3)
14925 .TestF32();
14926 }
14927
TEST(DECONVOLUTION_NHWC_F32,weights_cache_grouped_3x3s2)14928 TEST(DECONVOLUTION_NHWC_F32, weights_cache_grouped_3x3s2) {
14929 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14930 DeconvolutionOperatorTester()
14931 .input_size(kStridedInputHeight, kStridedInputWidth)
14932 .padding(1)
14933 .kernel_size(3, 3)
14934 .stride(2)
14935 .groups(2)
14936 .group_input_channels(17)
14937 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14938 .use_weights_cache(true)
14939 .iterations(3)
14940 .TestF32();
14941 }
14942
14943 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
14944
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2)14945 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2) {
14946 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14947 DeconvolutionOperatorTester()
14948 .batch_size(2)
14949 .input_size(kStridedInputHeight, kStridedInputWidth)
14950 .padding(1)
14951 .kernel_size(3, 3)
14952 .stride(2)
14953 .group_input_channels(15)
14954 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14955 .iterations(3)
14956 .TestF32();
14957 }
14958
TEST(DECONVOLUTION_NHWC_F32,batched_Kx3s2)14959 TEST(DECONVOLUTION_NHWC_F32, batched_Kx3s2) {
14960 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14961 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
14962 DeconvolutionOperatorTester()
14963 .batch_size(2)
14964 .input_size(kStridedInputHeight, kStridedInputWidth)
14965 .padding_width(1)
14966 .kernel_size(kernel_height, 3)
14967 .stride(2)
14968 .group_input_channels(17)
14969 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14970 .iterations(3)
14971 .TestF32();
14972 }
14973 }
14974
TEST(DECONVOLUTION_NHWC_F32,batched_3xKs2)14975 TEST(DECONVOLUTION_NHWC_F32, batched_3xKs2) {
14976 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14977 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
14978 DeconvolutionOperatorTester()
14979 .batch_size(2)
14980 .input_size(kStridedInputHeight, kStridedInputWidth)
14981 .padding_height(1)
14982 .kernel_size(3, kernel_width)
14983 .stride(2)
14984 .group_input_channels(17)
14985 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14986 .iterations(3)
14987 .TestF32();
14988 }
14989 }
14990
TEST(DECONVOLUTION_NHWC_F32,batched_3x3sSx1)14991 TEST(DECONVOLUTION_NHWC_F32, batched_3x3sSx1) {
14992 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14993 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
14994 DeconvolutionOperatorTester()
14995 .batch_size(2)
14996 .input_size(kStridedInputHeight, kStridedInputWidth)
14997 .padding(1)
14998 .padding_width(1)
14999 .kernel_size(3, 3)
15000 .stride_height(stride_height)
15001 .group_input_channels(17)
15002 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15003 .iterations(3)
15004 .TestF32();
15005 }
15006 }
15007
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s1xS)15008 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s1xS) {
15009 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15010 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
15011 DeconvolutionOperatorTester()
15012 .batch_size(2)
15013 .input_size(kStridedInputHeight, kStridedInputWidth)
15014 .padding(1)
15015 .padding_width(1)
15016 .kernel_size(3, 3)
15017 .stride_width(stride_width)
15018 .group_input_channels(17)
15019 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15020 .iterations(3)
15021 .TestF32();
15022 }
15023 }
15024
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_height_padding)15025 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_height_padding) {
15026 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15027 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
15028 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
15029 DeconvolutionOperatorTester()
15030 .batch_size(2)
15031 .input_size(kStridedInputHeight, kStridedInputWidth)
15032 .padding_width(1)
15033 .padding_top(padding_top)
15034 .padding_bottom(padding_bottom)
15035 .kernel_size(3, 3)
15036 .stride(2)
15037 .group_input_channels(15)
15038 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15039 .iterations(1)
15040 .TestF32();
15041 }
15042 }
15043 }
15044
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_width_padding)15045 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_width_padding) {
15046 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15047 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
15048 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
15049 DeconvolutionOperatorTester()
15050 .batch_size(2)
15051 .input_size(kStridedInputHeight, kStridedInputWidth)
15052 .padding_height(1)
15053 .padding_left(padding_left)
15054 .padding_right(padding_right)
15055 .kernel_size(3, 3)
15056 .stride(2)
15057 .group_input_channels(15)
15058 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15059 .iterations(1)
15060 .TestF32();
15061 }
15062 }
15063 }
15064
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_height_adjustment)15065 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_height_adjustment) {
15066 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15067 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
15068 DeconvolutionOperatorTester()
15069 .batch_size(2)
15070 .input_size(kStridedInputHeight, kStridedInputWidth)
15071 .padding(1)
15072 .adjustment_height(adjustment_height)
15073 .kernel_size(3, 3)
15074 .stride(2)
15075 .group_input_channels(15)
15076 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15077 .iterations(1)
15078 .TestF32();
15079 }
15080 }
15081
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_width_adjustment)15082 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_width_adjustment) {
15083 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15084 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
15085 DeconvolutionOperatorTester()
15086 .batch_size(2)
15087 .input_size(kStridedInputHeight, kStridedInputWidth)
15088 .padding(1)
15089 .adjustment_width(adjustment_width)
15090 .kernel_size(3, 3)
15091 .stride(2)
15092 .group_input_channels(15)
15093 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15094 .iterations(1)
15095 .TestF32();
15096 }
15097 }
15098
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_input_height)15099 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_input_height) {
15100 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15101 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15102 DeconvolutionOperatorTester()
15103 .batch_size(2)
15104 .input_size(input_height, kStridedInputWidth)
15105 .padding(1)
15106 .kernel_size(3, 3)
15107 .stride(2)
15108 .group_input_channels(15)
15109 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15110 .iterations(1)
15111 .TestF32();
15112 }
15113 }
15114
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_input_width)15115 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_input_width) {
15116 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15117 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15118 DeconvolutionOperatorTester()
15119 .batch_size(2)
15120 .input_size(kStridedInputHeight, kStridedInputWidth)
15121 .padding(1)
15122 .kernel_size(3, 3)
15123 .stride(2)
15124 .group_input_channels(15)
15125 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15126 .iterations(1)
15127 .TestF32();
15128 }
15129 }
15130
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_input_channels)15131 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_input_channels) {
15132 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15133 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
15134 DeconvolutionOperatorTester()
15135 .batch_size(2)
15136 .input_size(kStridedInputHeight, kStridedInputWidth)
15137 .padding(1)
15138 .kernel_size(3, 3)
15139 .stride(2)
15140 .group_input_channels(input_channels)
15141 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15142 .iterations(1)
15143 .TestF32();
15144 }
15145 }
15146
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_output_channels)15147 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_output_channels) {
15148 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15149 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15150 DeconvolutionOperatorTester()
15151 .batch_size(2)
15152 .input_size(kStridedInputHeight, kStridedInputWidth)
15153 .padding(1)
15154 .kernel_size(3, 3)
15155 .stride(2)
15156 .group_input_channels(23)
15157 .group_output_channels(output_channels)
15158 .iterations(1)
15159 .TestF32();
15160 }
15161 }
15162
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_input_stride)15163 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_input_stride) {
15164 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15165 DeconvolutionOperatorTester()
15166 .batch_size(2)
15167 .input_size(kStridedInputHeight, kStridedInputWidth)
15168 .padding(1)
15169 .kernel_size(3, 3)
15170 .stride(2)
15171 .group_input_channels(23)
15172 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15173 .input_pixel_stride(28)
15174 .iterations(3)
15175 .TestF32();
15176 }
15177
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_output_stride)15178 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_output_stride) {
15179 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15180 DeconvolutionOperatorTester()
15181 .batch_size(2)
15182 .input_size(kStridedInputHeight, kStridedInputWidth)
15183 .padding(1)
15184 .kernel_size(3, 3)
15185 .stride(2)
15186 .group_input_channels(23)
15187 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15188 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15189 .iterations(3)
15190 .TestF32();
15191 }
15192
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_qmin)15193 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_qmin) {
15194 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15195 DeconvolutionOperatorTester()
15196 .batch_size(2)
15197 .input_size(kStridedInputHeight, kStridedInputWidth)
15198 .padding(1)
15199 .kernel_size(3, 3)
15200 .stride(2)
15201 .group_input_channels(23)
15202 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15203 .qmin(128)
15204 .iterations(3)
15205 .TestF32();
15206 }
15207
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_qmax)15208 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_qmax) {
15209 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15210 DeconvolutionOperatorTester()
15211 .batch_size(2)
15212 .input_size(kStridedInputHeight, kStridedInputWidth)
15213 .padding(1)
15214 .kernel_size(3, 3)
15215 .stride(2)
15216 .group_input_channels(23)
15217 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15218 .qmax(128)
15219 .iterations(3)
15220 .TestF32();
15221 }
15222
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_without_bias)15223 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_without_bias) {
15224 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15225 DeconvolutionOperatorTester()
15226 .has_bias(false)
15227 .batch_size(2)
15228 .input_size(kStridedInputHeight, kStridedInputWidth)
15229 .padding(1)
15230 .kernel_size(3, 3)
15231 .stride(2)
15232 .group_input_channels(23)
15233 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15234 .iterations(3)
15235 .TestF32();
15236 }
15237
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_3x3s2)15238 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_3x3s2) {
15239 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15240 DeconvolutionOperatorTester()
15241 .batch_size(2)
15242 .input_size(kStridedInputHeight, kStridedInputWidth)
15243 .padding(1)
15244 .kernel_size(3, 3)
15245 .stride(2)
15246 .group_input_channels(15)
15247 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15248 .use_weights_cache(true)
15249 .iterations(3)
15250 .TestF32();
15251 }
15252
15253 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
15254
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2)15255 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2) {
15256 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15257 DeconvolutionOperatorTester()
15258 .batch_size(2)
15259 .input_size(kStridedInputHeight, kStridedInputWidth)
15260 .padding(1)
15261 .kernel_size(3, 3)
15262 .stride(2)
15263 .groups(2)
15264 .group_input_channels(17)
15265 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15266 .iterations(3)
15267 .TestF32();
15268 }
15269
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_Kx3s2)15270 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_Kx3s2) {
15271 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15272 for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
15273 DeconvolutionOperatorTester()
15274 .batch_size(2)
15275 .input_size(kStridedInputHeight, kStridedInputWidth)
15276 .padding_width(1)
15277 .kernel_size(kernel_height, 3)
15278 .stride(2)
15279 .groups(2)
15280 .group_input_channels(17)
15281 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15282 .iterations(3)
15283 .TestF32();
15284 }
15285 }
15286
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3xKs2)15287 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3xKs2) {
15288 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15289 for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
15290 DeconvolutionOperatorTester()
15291 .batch_size(2)
15292 .input_size(kStridedInputHeight, kStridedInputWidth)
15293 .padding_height(1)
15294 .kernel_size(3, kernel_width)
15295 .stride(2)
15296 .groups(2)
15297 .group_input_channels(17)
15298 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15299 .iterations(3)
15300 .TestF32();
15301 }
15302 }
15303
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3sSx1)15304 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3sSx1) {
15305 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15306 for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
15307 DeconvolutionOperatorTester()
15308 .batch_size(2)
15309 .input_size(kStridedInputHeight, kStridedInputWidth)
15310 .padding(1)
15311 .padding_width(1)
15312 .kernel_size(3, 3)
15313 .stride_height(stride_height)
15314 .groups(2)
15315 .group_input_channels(17)
15316 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15317 .iterations(3)
15318 .TestF32();
15319 }
15320 }
15321
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s1xS)15322 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s1xS) {
15323 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15324 for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
15325 DeconvolutionOperatorTester()
15326 .batch_size(2)
15327 .input_size(kStridedInputHeight, kStridedInputWidth)
15328 .padding(1)
15329 .padding_width(1)
15330 .kernel_size(3, 3)
15331 .stride_width(stride_width)
15332 .groups(2)
15333 .group_input_channels(17)
15334 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15335 .iterations(3)
15336 .TestF32();
15337 }
15338 }
15339
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_height_padding)15340 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_height_padding) {
15341 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15342 for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
15343 for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
15344 DeconvolutionOperatorTester()
15345 .batch_size(2)
15346 .input_size(kStridedInputHeight, kStridedInputWidth)
15347 .padding_width(1)
15348 .padding_top(padding_top)
15349 .padding_bottom(padding_bottom)
15350 .kernel_size(3, 3)
15351 .stride(2)
15352 .groups(2)
15353 .group_input_channels(17)
15354 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15355 .iterations(1)
15356 .TestF32();
15357 }
15358 }
15359 }
15360
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_width_padding)15361 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_width_padding) {
15362 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15363 for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
15364 for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
15365 DeconvolutionOperatorTester()
15366 .batch_size(2)
15367 .input_size(kStridedInputHeight, kStridedInputWidth)
15368 .padding_height(1)
15369 .padding_left(padding_left)
15370 .padding_right(padding_right)
15371 .kernel_size(3, 3)
15372 .stride(2)
15373 .groups(2)
15374 .group_input_channels(17)
15375 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15376 .iterations(1)
15377 .TestF32();
15378 }
15379 }
15380 }
15381
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_height_adjustment)15382 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_height_adjustment) {
15383 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15384 for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
15385 DeconvolutionOperatorTester()
15386 .batch_size(2)
15387 .input_size(kStridedInputHeight, kStridedInputWidth)
15388 .padding(1)
15389 .adjustment_height(adjustment_height)
15390 .kernel_size(3, 3)
15391 .stride(2)
15392 .groups(2)
15393 .group_input_channels(17)
15394 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15395 .iterations(1)
15396 .TestF32();
15397 }
15398 }
15399
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_width_adjustment)15400 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_width_adjustment) {
15401 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15402 for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
15403 DeconvolutionOperatorTester()
15404 .batch_size(2)
15405 .input_size(kStridedInputHeight, kStridedInputWidth)
15406 .padding(1)
15407 .adjustment_width(adjustment_width)
15408 .kernel_size(3, 3)
15409 .stride(2)
15410 .groups(2)
15411 .group_input_channels(17)
15412 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15413 .iterations(1)
15414 .TestF32();
15415 }
15416 }
15417
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_input_height)15418 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_input_height) {
15419 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15420 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15421 DeconvolutionOperatorTester()
15422 .batch_size(2)
15423 .input_size(input_height, kStridedInputWidth)
15424 .padding(1)
15425 .kernel_size(3, 3)
15426 .stride(2)
15427 .groups(2)
15428 .group_input_channels(17)
15429 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15430 .iterations(1)
15431 .TestF32();
15432 }
15433 }
15434
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_input_width)15435 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_input_width) {
15436 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15437 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15438 DeconvolutionOperatorTester()
15439 .batch_size(2)
15440 .input_size(kStridedInputHeight, kStridedInputWidth)
15441 .padding(1)
15442 .kernel_size(3, 3)
15443 .stride(2)
15444 .groups(2)
15445 .group_input_channels(17)
15446 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15447 .iterations(1)
15448 .TestF32();
15449 }
15450 }
15451
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_input_channels)15452 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_input_channels) {
15453 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15454 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
15455 DeconvolutionOperatorTester()
15456 .batch_size(2)
15457 .input_size(kStridedInputHeight, kStridedInputWidth)
15458 .padding(1)
15459 .kernel_size(3, 3)
15460 .stride(2)
15461 .groups(2)
15462 .group_input_channels(input_channels)
15463 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15464 .iterations(1)
15465 .TestF32();
15466 }
15467 }
15468
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_output_channels)15469 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_output_channels) {
15470 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15471 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15472 DeconvolutionOperatorTester()
15473 .batch_size(2)
15474 .input_size(kStridedInputHeight, kStridedInputWidth)
15475 .padding(1)
15476 .kernel_size(3, 3)
15477 .stride(2)
15478 .groups(2)
15479 .group_input_channels(17)
15480 .group_output_channels(output_channels)
15481 .iterations(1)
15482 .TestF32();
15483 }
15484 }
15485
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_input_stride)15486 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_input_stride) {
15487 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15488 DeconvolutionOperatorTester()
15489 .batch_size(2)
15490 .input_size(kStridedInputHeight, kStridedInputWidth)
15491 .padding(1)
15492 .kernel_size(3, 3)
15493 .stride(2)
15494 .groups(2)
15495 .group_input_channels(17)
15496 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15497 .input_pixel_stride(37)
15498 .iterations(3)
15499 .TestF32();
15500 }
15501
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_output_stride)15502 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_output_stride) {
15503 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15504 DeconvolutionOperatorTester()
15505 .batch_size(2)
15506 .input_size(kStridedInputHeight, kStridedInputWidth)
15507 .padding(1)
15508 .kernel_size(3, 3)
15509 .stride(2)
15510 .groups(2)
15511 .group_input_channels(17)
15512 .group_output_channels(xnn_params.f32.gemm.nr + 3)
15513 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15514 .iterations(3)
15515 .TestF32();
15516 }
15517
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_qmin)15518 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_qmin) {
15519 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15520 DeconvolutionOperatorTester()
15521 .batch_size(2)
15522 .input_size(kStridedInputHeight, kStridedInputWidth)
15523 .padding(1)
15524 .kernel_size(3, 3)
15525 .stride(2)
15526 .groups(2)
15527 .group_input_channels(17)
15528 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15529 .qmin(128)
15530 .iterations(3)
15531 .TestF32();
15532 }
15533
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_qmax)15534 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_qmax) {
15535 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15536 DeconvolutionOperatorTester()
15537 .batch_size(2)
15538 .input_size(kStridedInputHeight, kStridedInputWidth)
15539 .padding(1)
15540 .kernel_size(3, 3)
15541 .stride(2)
15542 .groups(2)
15543 .group_input_channels(17)
15544 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15545 .qmax(128)
15546 .iterations(3)
15547 .TestF32();
15548 }
15549
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_without_bias)15550 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_without_bias) {
15551 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15552 DeconvolutionOperatorTester()
15553 .has_bias(false)
15554 .batch_size(2)
15555 .input_size(kStridedInputHeight, kStridedInputWidth)
15556 .padding(1)
15557 .kernel_size(3, 3)
15558 .stride(2)
15559 .groups(2)
15560 .group_input_channels(17)
15561 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15562 .iterations(3)
15563 .TestF32();
15564 }
15565
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_grouped_3x3s2)15566 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_grouped_3x3s2) {
15567 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15568 DeconvolutionOperatorTester()
15569 .batch_size(2)
15570 .input_size(kStridedInputHeight, kStridedInputWidth)
15571 .padding(1)
15572 .kernel_size(3, 3)
15573 .stride(2)
15574 .groups(2)
15575 .group_input_channels(17)
15576 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15577 .use_weights_cache(true)
15578 .iterations(3)
15579 .TestF32();
15580 }
15581
15582 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
15583
15584 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_setup_changing_batch) {
15585 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15586 DeconvolutionOperatorTester()
15587 .batch_size(2)
15588 .next_batch_size(5)
15589 .input_size(kStridedInputHeight, kStridedInputWidth)
15590 .kernel_size(3, 3)
15591 .stride(2)
15592 .groups(2)
15593 .group_input_channels(15)
15594 .group_output_channels(17)
15595 .TestSetupF32();
15596 }
15597
15598 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_setup_changing_height) {
15599 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15600 DeconvolutionOperatorTester()
15601 .batch_size(2)
15602 .input_size(kStridedInputHeight, kStridedInputWidth)
15603 .next_input_height(kStridedInputHeight + 3)
15604 .kernel_size(3, 3)
15605 .stride(2)
15606 .groups(2)
15607 .group_input_channels(15)
15608 .group_output_channels(17)
15609 .TestSetupF32();
15610 }
15611
15612 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_setup_changing_width) {
15613 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15614 DeconvolutionOperatorTester()
15615 .batch_size(2)
15616 .input_size(kStridedInputHeight, kStridedInputWidth)
15617 .next_input_width(kStridedInputWidth + 3)
15618 .kernel_size(3, 3)
15619 .stride(2)
15620 .groups(2)
15621 .group_input_channels(15)
15622 .group_output_channels(17)
15623 .TestSetupF32();
15624 }
15625
15626 /**************************** SUBCONV2D/GEMM path ****************************/
15627
15628 TEST(DECONVOLUTION_NHWC_F32, 2x2s2) {
15629 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15630 DeconvolutionOperatorTester()
15631 .input_size(kStridedInputHeight, kStridedInputWidth)
15632 .kernel_size(2, 2)
15633 .stride(2)
15634 .group_input_channels(15)
15635 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15636 .iterations(3)
15637 .TestF32();
15638 }
15639
TEST(DECONVOLUTION_NHWC_F32,Kx2sKx2)15640 TEST(DECONVOLUTION_NHWC_F32, Kx2sKx2) {
15641 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15642 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
15643 DeconvolutionOperatorTester()
15644 .input_size(kStridedInputHeight, kStridedInputWidth)
15645 .kernel_size(kernel_height, 2)
15646 .stride(kernel_height, 2)
15647 .group_input_channels(17)
15648 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15649 .iterations(3)
15650 .TestF32();
15651 }
15652 }
15653
15654 TEST(DECONVOLUTION_NHWC_F32, 2xKs2xK) {
15655 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15656 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
15657 DeconvolutionOperatorTester()
15658 .input_size(kStridedInputHeight, kStridedInputWidth)
15659 .kernel_size(2, kernel_width)
15660 .stride(2, kernel_width)
15661 .group_input_channels(17)
15662 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15663 .iterations(3)
15664 .TestF32();
15665 }
15666 }
15667
15668 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_height_adjustment) {
15669 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15670 DeconvolutionOperatorTester()
15671 .input_size(kStridedInputHeight, kStridedInputWidth)
15672 .adjustment_height(1)
15673 .kernel_size(2, 2)
15674 .stride(2)
15675 .group_input_channels(15)
15676 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15677 .iterations(1)
15678 .TestF32();
15679 }
15680
15681 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_width_adjustment) {
15682 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15683 DeconvolutionOperatorTester()
15684 .input_size(kStridedInputHeight, kStridedInputWidth)
15685 .adjustment_width(1)
15686 .kernel_size(2, 2)
15687 .stride(2)
15688 .group_input_channels(15)
15689 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15690 .iterations(1)
15691 .TestF32();
15692 }
15693
15694 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_input_height) {
15695 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15696 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15697 DeconvolutionOperatorTester()
15698 .input_size(input_height, kStridedInputWidth)
15699 .kernel_size(2, 2)
15700 .stride(2)
15701 .group_input_channels(15)
15702 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15703 .iterations(1)
15704 .TestF32();
15705 }
15706 }
15707
15708 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_input_width) {
15709 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15710 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15711 DeconvolutionOperatorTester()
15712 .input_size(kStridedInputHeight, kStridedInputWidth)
15713 .kernel_size(2, 2)
15714 .stride(2)
15715 .group_input_channels(15)
15716 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15717 .iterations(1)
15718 .TestF32();
15719 }
15720 }
15721
15722 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_input_channels) {
15723 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15724 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
15725 DeconvolutionOperatorTester()
15726 .input_size(kStridedInputHeight, kStridedInputWidth)
15727 .kernel_size(2, 2)
15728 .stride(2)
15729 .group_input_channels(input_channels)
15730 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15731 .iterations(1)
15732 .TestF32();
15733 }
15734 }
15735
15736 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_output_channels) {
15737 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15738 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15739 DeconvolutionOperatorTester()
15740 .input_size(kStridedInputHeight, kStridedInputWidth)
15741 .kernel_size(2, 2)
15742 .stride(2)
15743 .group_input_channels(23)
15744 .group_output_channels(output_channels)
15745 .iterations(1)
15746 .TestF32();
15747 }
15748 }
15749
15750 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_input_stride) {
15751 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15752 DeconvolutionOperatorTester()
15753 .input_size(kStridedInputHeight, kStridedInputWidth)
15754 .kernel_size(2, 2)
15755 .stride(2)
15756 .group_input_channels(23)
15757 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15758 .input_pixel_stride(28)
15759 .iterations(3)
15760 .TestF32();
15761 }
15762
15763 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_output_stride) {
15764 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15765 DeconvolutionOperatorTester()
15766 .input_size(kStridedInputHeight, kStridedInputWidth)
15767 .kernel_size(2, 2)
15768 .stride(2)
15769 .group_input_channels(23)
15770 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15771 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15772 .iterations(3)
15773 .TestF32();
15774 }
15775
15776 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_qmin) {
15777 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15778 DeconvolutionOperatorTester()
15779 .input_size(kStridedInputHeight, kStridedInputWidth)
15780 .kernel_size(2, 2)
15781 .stride(2)
15782 .group_input_channels(23)
15783 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15784 .qmin(128)
15785 .iterations(3)
15786 .TestF32();
15787 }
15788
15789 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_qmax) {
15790 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15791 DeconvolutionOperatorTester()
15792 .input_size(kStridedInputHeight, kStridedInputWidth)
15793 .kernel_size(2, 2)
15794 .stride(2)
15795 .group_input_channels(23)
15796 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15797 .qmax(128)
15798 .iterations(3)
15799 .TestF32();
15800 }
15801
15802 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_without_bias) {
15803 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15804 DeconvolutionOperatorTester()
15805 .has_bias(false)
15806 .input_size(kStridedInputHeight, kStridedInputWidth)
15807 .kernel_size(2, 2)
15808 .stride(2)
15809 .group_input_channels(23)
15810 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15811 .iterations(3)
15812 .TestF32();
15813 }
15814
TEST(DECONVOLUTION_NHWC_F32,weights_cache_2x2s2)15815 TEST(DECONVOLUTION_NHWC_F32, weights_cache_2x2s2) {
15816 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15817 DeconvolutionOperatorTester()
15818 .input_size(kStridedInputHeight, kStridedInputWidth)
15819 .kernel_size(2, 2)
15820 .stride(2)
15821 .group_input_channels(15)
15822 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15823 .use_weights_cache(true)
15824 .iterations(3)
15825 .TestF32();
15826 }
15827
15828 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
15829
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2)15830 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2) {
15831 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15832 DeconvolutionOperatorTester()
15833 .input_size(kStridedInputHeight, kStridedInputWidth)
15834 .kernel_size(2, 2)
15835 .stride(2)
15836 .groups(2)
15837 .group_input_channels(17)
15838 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15839 .iterations(3)
15840 .TestF32();
15841 }
15842
TEST(DECONVOLUTION_NHWC_F32,grouped_Kx2sKx2)15843 TEST(DECONVOLUTION_NHWC_F32, grouped_Kx2sKx2) {
15844 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15845 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
15846 DeconvolutionOperatorTester()
15847 .input_size(kStridedInputHeight, kStridedInputWidth)
15848 .kernel_size(kernel_height, 2)
15849 .stride(kernel_height, 2)
15850 .groups(2)
15851 .group_input_channels(17)
15852 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15853 .iterations(3)
15854 .TestF32();
15855 }
15856 }
15857
TEST(DECONVOLUTION_NHWC_F32,grouped_2xKs2xK)15858 TEST(DECONVOLUTION_NHWC_F32, grouped_2xKs2xK) {
15859 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15860 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
15861 DeconvolutionOperatorTester()
15862 .input_size(kStridedInputHeight, kStridedInputWidth)
15863 .kernel_size(2, kernel_width)
15864 .stride(2, kernel_width)
15865 .groups(2)
15866 .group_input_channels(17)
15867 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15868 .iterations(3)
15869 .TestF32();
15870 }
15871 }
15872
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_height_adjustment)15873 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_height_adjustment) {
15874 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15875 DeconvolutionOperatorTester()
15876 .input_size(kStridedInputHeight, kStridedInputWidth)
15877 .adjustment_height(1)
15878 .kernel_size(2, 2)
15879 .stride(2)
15880 .groups(2)
15881 .group_input_channels(17)
15882 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15883 .iterations(1)
15884 .TestF32();
15885 }
15886
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_width_adjustment)15887 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_width_adjustment) {
15888 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15889 DeconvolutionOperatorTester()
15890 .input_size(kStridedInputHeight, kStridedInputWidth)
15891 .adjustment_width(1)
15892 .kernel_size(2, 2)
15893 .stride(2)
15894 .groups(2)
15895 .group_input_channels(17)
15896 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15897 .iterations(1)
15898 .TestF32();
15899 }
15900
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_input_height)15901 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_input_height) {
15902 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15903 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15904 DeconvolutionOperatorTester()
15905 .input_size(input_height, kStridedInputWidth)
15906 .kernel_size(2, 2)
15907 .stride(2)
15908 .groups(2)
15909 .group_input_channels(17)
15910 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15911 .iterations(1)
15912 .TestF32();
15913 }
15914 }
15915
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_input_width)15916 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_input_width) {
15917 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15918 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15919 DeconvolutionOperatorTester()
15920 .input_size(kStridedInputHeight, kStridedInputWidth)
15921 .kernel_size(2, 2)
15922 .stride(2)
15923 .groups(2)
15924 .group_input_channels(17)
15925 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15926 .iterations(1)
15927 .TestF32();
15928 }
15929 }
15930
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_input_channels)15931 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_input_channels) {
15932 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15933 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
15934 DeconvolutionOperatorTester()
15935 .input_size(kStridedInputHeight, kStridedInputWidth)
15936 .kernel_size(2, 2)
15937 .stride(2)
15938 .groups(2)
15939 .group_input_channels(input_channels)
15940 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15941 .iterations(1)
15942 .TestF32();
15943 }
15944 }
15945
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_output_channels)15946 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_output_channels) {
15947 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15948 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15949 DeconvolutionOperatorTester()
15950 .input_size(kStridedInputHeight, kStridedInputWidth)
15951 .kernel_size(2, 2)
15952 .stride(2)
15953 .groups(2)
15954 .group_input_channels(17)
15955 .group_output_channels(output_channels)
15956 .iterations(1)
15957 .TestF32();
15958 }
15959 }
15960
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_input_stride)15961 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_input_stride) {
15962 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15963 DeconvolutionOperatorTester()
15964 .input_size(kStridedInputHeight, kStridedInputWidth)
15965 .kernel_size(2, 2)
15966 .stride(2)
15967 .groups(2)
15968 .group_input_channels(17)
15969 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15970 .input_pixel_stride(37)
15971 .iterations(3)
15972 .TestF32();
15973 }
15974
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_output_stride)15975 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_output_stride) {
15976 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15977 DeconvolutionOperatorTester()
15978 .input_size(kStridedInputHeight, kStridedInputWidth)
15979 .kernel_size(2, 2)
15980 .stride(2)
15981 .groups(2)
15982 .group_input_channels(17)
15983 .group_output_channels(xnn_params.f32.gemm.nr + 3)
15984 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15985 .iterations(3)
15986 .TestF32();
15987 }
15988
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_qmin)15989 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_qmin) {
15990 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15991 DeconvolutionOperatorTester()
15992 .input_size(kStridedInputHeight, kStridedInputWidth)
15993 .kernel_size(2, 2)
15994 .stride(2)
15995 .groups(2)
15996 .group_input_channels(17)
15997 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15998 .qmin(128)
15999 .iterations(3)
16000 .TestF32();
16001 }
16002
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_qmax)16003 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_qmax) {
16004 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16005 DeconvolutionOperatorTester()
16006 .input_size(kStridedInputHeight, kStridedInputWidth)
16007 .kernel_size(2, 2)
16008 .stride(2)
16009 .groups(2)
16010 .group_input_channels(17)
16011 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16012 .qmax(128)
16013 .iterations(3)
16014 .TestF32();
16015 }
16016
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_without_bias)16017 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_without_bias) {
16018 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16019 DeconvolutionOperatorTester()
16020 .has_bias(false)
16021 .input_size(kStridedInputHeight, kStridedInputWidth)
16022 .kernel_size(2, 2)
16023 .stride(2)
16024 .groups(2)
16025 .group_input_channels(17)
16026 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16027 .iterations(3)
16028 .TestF32();
16029 }
16030
TEST(DECONVOLUTION_NHWC_F32,weights_cache_grouped_2x2s2)16031 TEST(DECONVOLUTION_NHWC_F32, weights_cache_grouped_2x2s2) {
16032 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16033 DeconvolutionOperatorTester()
16034 .input_size(kStridedInputHeight, kStridedInputWidth)
16035 .kernel_size(2, 2)
16036 .stride(2)
16037 .groups(2)
16038 .group_input_channels(17)
16039 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16040 .use_weights_cache(true)
16041 .iterations(3)
16042 .TestF32();
16043 }
16044
16045 /**************************** SUBCONV2D/GEMM path, batched ****************************/
16046
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2)16047 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2) {
16048 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16049 DeconvolutionOperatorTester()
16050 .batch_size(2)
16051 .input_size(kStridedInputHeight, kStridedInputWidth)
16052 .kernel_size(2, 2)
16053 .stride(2)
16054 .group_input_channels(15)
16055 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16056 .iterations(3)
16057 .TestF32();
16058 }
16059
TEST(DECONVOLUTION_NHWC_F32,batched_Kx2sKx2)16060 TEST(DECONVOLUTION_NHWC_F32, batched_Kx2sKx2) {
16061 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16062 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
16063 DeconvolutionOperatorTester()
16064 .batch_size(2)
16065 .input_size(kStridedInputHeight, kStridedInputWidth)
16066 .kernel_size(kernel_height, 2)
16067 .stride(kernel_height, 2)
16068 .group_input_channels(17)
16069 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16070 .iterations(3)
16071 .TestF32();
16072 }
16073 }
16074
TEST(DECONVOLUTION_NHWC_F32,batched_2xKs2xK)16075 TEST(DECONVOLUTION_NHWC_F32, batched_2xKs2xK) {
16076 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16077 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
16078 DeconvolutionOperatorTester()
16079 .batch_size(2)
16080 .input_size(kStridedInputHeight, kStridedInputWidth)
16081 .kernel_size(2, kernel_width)
16082 .stride(2, kernel_width)
16083 .group_input_channels(17)
16084 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16085 .iterations(3)
16086 .TestF32();
16087 }
16088 }
16089
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_height_adjustment)16090 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_height_adjustment) {
16091 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16092 DeconvolutionOperatorTester()
16093 .batch_size(2)
16094 .input_size(kStridedInputHeight, kStridedInputWidth)
16095 .adjustment_height(1)
16096 .kernel_size(2, 2)
16097 .stride(2)
16098 .group_input_channels(15)
16099 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16100 .iterations(1)
16101 .TestF32();
16102 }
16103
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_width_adjustment)16104 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_width_adjustment) {
16105 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16106 DeconvolutionOperatorTester()
16107 .batch_size(2)
16108 .input_size(kStridedInputHeight, kStridedInputWidth)
16109 .adjustment_width(1)
16110 .kernel_size(2, 2)
16111 .stride(2)
16112 .group_input_channels(15)
16113 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16114 .iterations(1)
16115 .TestF32();
16116 }
16117
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_input_height)16118 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_input_height) {
16119 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16120 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
16121 DeconvolutionOperatorTester()
16122 .batch_size(2)
16123 .input_size(input_height, kStridedInputWidth)
16124 .kernel_size(2, 2)
16125 .stride(2)
16126 .group_input_channels(15)
16127 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16128 .iterations(1)
16129 .TestF32();
16130 }
16131 }
16132
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_input_width)16133 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_input_width) {
16134 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16135 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
16136 DeconvolutionOperatorTester()
16137 .batch_size(2)
16138 .input_size(kStridedInputHeight, kStridedInputWidth)
16139 .kernel_size(2, 2)
16140 .stride(2)
16141 .group_input_channels(15)
16142 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16143 .iterations(1)
16144 .TestF32();
16145 }
16146 }
16147
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_input_channels)16148 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_input_channels) {
16149 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16150 for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
16151 DeconvolutionOperatorTester()
16152 .batch_size(2)
16153 .input_size(kStridedInputHeight, kStridedInputWidth)
16154 .kernel_size(2, 2)
16155 .stride(2)
16156 .group_input_channels(input_channels)
16157 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16158 .iterations(1)
16159 .TestF32();
16160 }
16161 }
16162
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_output_channels)16163 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_output_channels) {
16164 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16165 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
16166 DeconvolutionOperatorTester()
16167 .batch_size(2)
16168 .input_size(kStridedInputHeight, kStridedInputWidth)
16169 .kernel_size(2, 2)
16170 .stride(2)
16171 .group_input_channels(23)
16172 .group_output_channels(output_channels)
16173 .iterations(1)
16174 .TestF32();
16175 }
16176 }
16177
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_input_stride)16178 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_input_stride) {
16179 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16180 DeconvolutionOperatorTester()
16181 .batch_size(2)
16182 .input_size(kStridedInputHeight, kStridedInputWidth)
16183 .kernel_size(2, 2)
16184 .stride(2)
16185 .group_input_channels(23)
16186 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16187 .input_pixel_stride(28)
16188 .iterations(3)
16189 .TestF32();
16190 }
16191
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_output_stride)16192 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_output_stride) {
16193 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16194 DeconvolutionOperatorTester()
16195 .batch_size(2)
16196 .input_size(kStridedInputHeight, kStridedInputWidth)
16197 .kernel_size(2, 2)
16198 .stride(2)
16199 .group_input_channels(23)
16200 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16201 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
16202 .iterations(3)
16203 .TestF32();
16204 }
16205
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_qmin)16206 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_qmin) {
16207 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16208 DeconvolutionOperatorTester()
16209 .batch_size(2)
16210 .input_size(kStridedInputHeight, kStridedInputWidth)
16211 .kernel_size(2, 2)
16212 .stride(2)
16213 .group_input_channels(23)
16214 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16215 .qmin(128)
16216 .iterations(3)
16217 .TestF32();
16218 }
16219
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_qmax)16220 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_qmax) {
16221 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16222 DeconvolutionOperatorTester()
16223 .batch_size(2)
16224 .input_size(kStridedInputHeight, kStridedInputWidth)
16225 .kernel_size(2, 2)
16226 .stride(2)
16227 .group_input_channels(23)
16228 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16229 .qmax(128)
16230 .iterations(3)
16231 .TestF32();
16232 }
16233
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_without_bias)16234 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_without_bias) {
16235 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16236 DeconvolutionOperatorTester()
16237 .has_bias(false)
16238 .batch_size(2)
16239 .input_size(kStridedInputHeight, kStridedInputWidth)
16240 .kernel_size(2, 2)
16241 .stride(2)
16242 .group_input_channels(23)
16243 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16244 .iterations(3)
16245 .TestF32();
16246 }
16247
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_2x2s2)16248 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_2x2s2) {
16249 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16250 DeconvolutionOperatorTester()
16251 .batch_size(2)
16252 .input_size(kStridedInputHeight, kStridedInputWidth)
16253 .kernel_size(2, 2)
16254 .stride(2)
16255 .group_input_channels(15)
16256 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16257 .use_weights_cache(true)
16258 .iterations(3)
16259 .TestF32();
16260 }
16261
16262 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
16263
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2)16264 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2) {
16265 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16266 DeconvolutionOperatorTester()
16267 .batch_size(2)
16268 .input_size(kStridedInputHeight, kStridedInputWidth)
16269 .kernel_size(2, 2)
16270 .stride(2)
16271 .groups(2)
16272 .group_input_channels(17)
16273 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16274 .iterations(3)
16275 .TestF32();
16276 }
16277
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_Kx2sKx2)16278 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_Kx2sKx2) {
16279 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16280 for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
16281 DeconvolutionOperatorTester()
16282 .batch_size(2)
16283 .input_size(kStridedInputHeight, kStridedInputWidth)
16284 .kernel_size(kernel_height, 2)
16285 .stride(kernel_height, 2)
16286 .groups(2)
16287 .group_input_channels(17)
16288 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16289 .iterations(3)
16290 .TestF32();
16291 }
16292 }
16293
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2xKs2xK)16294 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2xKs2xK) {
16295 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16296 for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
16297 DeconvolutionOperatorTester()
16298 .batch_size(2)
16299 .input_size(kStridedInputHeight, kStridedInputWidth)
16300 .kernel_size(2, kernel_width)
16301 .stride(2, kernel_width)
16302 .groups(2)
16303 .group_input_channels(17)
16304 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16305 .iterations(3)
16306 .TestF32();
16307 }
16308 }
16309
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_height_adjustment)16310 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_height_adjustment) {
16311 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16312 DeconvolutionOperatorTester()
16313 .batch_size(2)
16314 .input_size(kStridedInputHeight, kStridedInputWidth)
16315 .adjustment_height(1)
16316 .kernel_size(2, 2)
16317 .stride(2)
16318 .groups(2)
16319 .group_input_channels(17)
16320 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16321 .iterations(1)
16322 .TestF32();
16323 }
16324
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_width_adjustment)16325 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_width_adjustment) {
16326 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16327 DeconvolutionOperatorTester()
16328 .batch_size(2)
16329 .input_size(kStridedInputHeight, kStridedInputWidth)
16330 .adjustment_width(1)
16331 .kernel_size(2, 2)
16332 .stride(2)
16333 .groups(2)
16334 .group_input_channels(17)
16335 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16336 .iterations(1)
16337 .TestF32();
16338 }
16339
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_input_height)16340 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_input_height) {
16341 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16342 for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
16343 DeconvolutionOperatorTester()
16344 .batch_size(2)
16345 .input_size(input_height, kStridedInputWidth)
16346 .kernel_size(2, 2)
16347 .stride(2)
16348 .groups(2)
16349 .group_input_channels(17)
16350 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16351 .iterations(1)
16352 .TestF32();
16353 }
16354 }
16355
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_input_width)16356 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_input_width) {
16357 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16358 for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
16359 DeconvolutionOperatorTester()
16360 .batch_size(2)
16361 .input_size(kStridedInputHeight, kStridedInputWidth)
16362 .kernel_size(2, 2)
16363 .stride(2)
16364 .groups(2)
16365 .group_input_channels(17)
16366 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16367 .iterations(1)
16368 .TestF32();
16369 }
16370 }
16371
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_input_channels)16372 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_input_channels) {
16373 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16374 for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
16375 DeconvolutionOperatorTester()
16376 .batch_size(2)
16377 .input_size(kStridedInputHeight, kStridedInputWidth)
16378 .kernel_size(2, 2)
16379 .stride(2)
16380 .groups(2)
16381 .group_input_channels(input_channels)
16382 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16383 .iterations(1)
16384 .TestF32();
16385 }
16386 }
16387
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_output_channels)16388 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_output_channels) {
16389 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16390 for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
16391 DeconvolutionOperatorTester()
16392 .batch_size(2)
16393 .input_size(kStridedInputHeight, kStridedInputWidth)
16394 .kernel_size(2, 2)
16395 .stride(2)
16396 .groups(2)
16397 .group_input_channels(17)
16398 .group_output_channels(output_channels)
16399 .iterations(1)
16400 .TestF32();
16401 }
16402 }
16403
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_input_stride)16404 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_input_stride) {
16405 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16406 DeconvolutionOperatorTester()
16407 .batch_size(2)
16408 .input_size(kStridedInputHeight, kStridedInputWidth)
16409 .kernel_size(2, 2)
16410 .stride(2)
16411 .groups(2)
16412 .group_input_channels(17)
16413 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16414 .input_pixel_stride(37)
16415 .iterations(3)
16416 .TestF32();
16417 }
16418
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_output_stride)16419 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_output_stride) {
16420 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16421 DeconvolutionOperatorTester()
16422 .batch_size(2)
16423 .input_size(kStridedInputHeight, kStridedInputWidth)
16424 .kernel_size(2, 2)
16425 .stride(2)
16426 .groups(2)
16427 .group_input_channels(17)
16428 .group_output_channels(xnn_params.f32.gemm.nr + 3)
16429 .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
16430 .iterations(3)
16431 .TestF32();
16432 }
16433
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_qmin)16434 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_qmin) {
16435 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16436 DeconvolutionOperatorTester()
16437 .batch_size(2)
16438 .input_size(kStridedInputHeight, kStridedInputWidth)
16439 .kernel_size(2, 2)
16440 .stride(2)
16441 .groups(2)
16442 .group_input_channels(17)
16443 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16444 .qmin(128)
16445 .iterations(3)
16446 .TestF32();
16447 }
16448
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_qmax)16449 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_qmax) {
16450 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16451 DeconvolutionOperatorTester()
16452 .batch_size(2)
16453 .input_size(kStridedInputHeight, kStridedInputWidth)
16454 .kernel_size(2, 2)
16455 .stride(2)
16456 .groups(2)
16457 .group_input_channels(17)
16458 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16459 .qmax(128)
16460 .iterations(3)
16461 .TestF32();
16462 }
16463
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_without_bias)16464 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_without_bias) {
16465 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16466 DeconvolutionOperatorTester()
16467 .has_bias(false)
16468 .batch_size(2)
16469 .input_size(kStridedInputHeight, kStridedInputWidth)
16470 .kernel_size(2, 2)
16471 .stride(2)
16472 .groups(2)
16473 .group_input_channels(17)
16474 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16475 .iterations(3)
16476 .TestF32();
16477 }
16478
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_grouped_2x2s2)16479 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_grouped_2x2s2) {
16480 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16481 DeconvolutionOperatorTester()
16482 .batch_size(2)
16483 .input_size(kStridedInputHeight, kStridedInputWidth)
16484 .kernel_size(2, 2)
16485 .stride(2)
16486 .groups(2)
16487 .group_input_channels(17)
16488 .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16489 .use_weights_cache(true)
16490 .iterations(3)
16491 .TestF32();
16492 }
16493
16494 /**************************** SUBCONV2D/GEMM path, setup ****************************/
16495
16496 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_setup_changing_batch) {
16497 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16498 DeconvolutionOperatorTester()
16499 .batch_size(2)
16500 .next_batch_size(5)
16501 .input_size(kStridedInputHeight, kStridedInputWidth)
16502 .kernel_size(2, 2)
16503 .stride(2)
16504 .groups(2)
16505 .group_input_channels(15)
16506 .group_output_channels(17)
16507 .TestSetupF32();
16508 }
16509
16510 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_setup_changing_height) {
16511 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16512 DeconvolutionOperatorTester()
16513 .batch_size(2)
16514 .input_size(kStridedInputHeight, kStridedInputWidth)
16515 .next_input_height(kStridedInputHeight + 3)
16516 .kernel_size(2, 2)
16517 .stride(2)
16518 .groups(2)
16519 .group_input_channels(15)
16520 .group_output_channels(17)
16521 .TestSetupF32();
16522 }
16523
16524 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_setup_changing_width) {
16525 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16526 DeconvolutionOperatorTester()
16527 .batch_size(2)
16528 .input_size(kStridedInputHeight, kStridedInputWidth)
16529 .next_input_width(kStridedInputWidth + 3)
16530 .kernel_size(2, 2)
16531 .stride(2)
16532 .groups(2)
16533 .group_input_channels(15)
16534 .group_output_channels(17)
16535 .TestSetupF32();
16536 }
16537