xref: /aosp_15_r20/external/XNNPACK/test/deconvolution-nhwc.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <gtest/gtest.h>
7 
8 #include <xnnpack/params.h>
9 
10 #include "deconvolution-operator-tester.h"
11 
12 
13 constexpr size_t kUnstridedInputHeight = 8;
14 constexpr size_t kUnstridedInputWidth = 7;
15 constexpr size_t kStridedInputHeight = 6;
16 constexpr size_t kStridedInputWidth = 5;
17 
18 
19 /**************************** Future GEMM path ****************************/
20 
21 TEST(DECONVOLUTION_NHWC_QS8, 1x1) {
22   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
23   DeconvolutionOperatorTester()
24     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
25     .kernel_size(1, 1)
26     .group_input_channels(23)
27     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
28     .iterations(3)
29     .TestQS8();
30 }
31 
32 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_input_width) {
33   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
34   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
35     DeconvolutionOperatorTester()
36       .input_size(input_height, kUnstridedInputWidth)
37       .kernel_size(1, 1)
38       .group_input_channels(23)
39       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
40       .iterations(1)
41       .TestQS8();
42   }
43 }
44 
45 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_input_height) {
46   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
47   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
48     DeconvolutionOperatorTester()
49       .input_size(kUnstridedInputHeight, input_width)
50       .kernel_size(1, 1)
51       .group_input_channels(23)
52       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
53       .iterations(1)
54       .TestQS8();
55   }
56 }
57 
58 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_input_channels) {
59   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
60   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
61     DeconvolutionOperatorTester()
62       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
63       .kernel_size(1, 1)
64       .group_input_channels(input_channels)
65       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
66       .iterations(1)
67       .TestQS8();
68   }
69 }
70 
71 TEST(DECONVOLUTION_NHWC_QS8, 1x1_varying_output_channels) {
72   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
73   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
74     DeconvolutionOperatorTester()
75       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
76       .kernel_size(1, 1)
77       .group_input_channels(23)
78       .group_output_channels(output_channels)
79       .iterations(1)
80       .TestQS8();
81   }
82 }
83 
84 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_input_stride) {
85   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
86   DeconvolutionOperatorTester()
87     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
88     .kernel_size(1, 1)
89     .group_input_channels(23)
90     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
91     .input_pixel_stride(28)
92     .iterations(3)
93     .TestQS8();
94 }
95 
96 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_output_stride) {
97   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
98   DeconvolutionOperatorTester()
99     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
100     .kernel_size(1, 1)
101     .group_input_channels(23)
102     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
103     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
104     .iterations(3)
105     .TestQS8();
106 }
107 
108 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_qmin) {
109   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
110   DeconvolutionOperatorTester()
111     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
112     .kernel_size(1, 1)
113     .group_input_channels(23)
114     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
115     .qmin(128)
116     .iterations(3)
117     .TestQS8();
118 }
119 
120 TEST(DECONVOLUTION_NHWC_QS8, 1x1_with_qmax) {
121   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
122   DeconvolutionOperatorTester()
123     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
124     .kernel_size(1, 1)
125     .group_input_channels(23)
126     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
127     .qmax(128)
128     .iterations(3)
129     .TestQS8();
130 }
131 
132 TEST(DECONVOLUTION_NHWC_QS8, 1x1_without_bias) {
133   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
134   DeconvolutionOperatorTester()
135     .has_bias(false)
136     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
137     .kernel_size(1, 1)
138     .group_input_channels(23)
139     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
140     .iterations(3)
141     .TestQS8();
142 }
143 
144 /**************************** Future GEMM path, grouped ****************************/
145 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1)146 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1) {
147   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
148   DeconvolutionOperatorTester()
149     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
150     .kernel_size(1, 1)
151     .groups(2)
152     .group_input_channels(23)
153     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
154     .iterations(3)
155     .TestQS8();
156 }
157 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_input_width)158 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_input_width) {
159   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
160   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
161     DeconvolutionOperatorTester()
162       .input_size(input_height, kUnstridedInputWidth)
163       .kernel_size(1, 1)
164       .groups(2)
165       .group_input_channels(23)
166       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
167       .iterations(1)
168       .TestQS8();
169   }
170 }
171 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_input_height)172 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_input_height) {
173   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
174   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
175     DeconvolutionOperatorTester()
176       .input_size(kUnstridedInputHeight, input_width)
177       .kernel_size(1, 1)
178       .groups(2)
179       .group_input_channels(23)
180       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
181       .iterations(1)
182       .TestQS8();
183   }
184 }
185 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_input_channels)186 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_input_channels) {
187   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
188   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
189     DeconvolutionOperatorTester()
190       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
191       .kernel_size(1, 1)
192       .groups(2)
193       .group_input_channels(input_channels)
194       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
195       .iterations(1)
196       .TestQS8();
197   }
198 }
199 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_varying_output_channels)200 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_varying_output_channels) {
201   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
202   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
203     DeconvolutionOperatorTester()
204       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
205       .kernel_size(1, 1)
206       .groups(2)
207       .group_input_channels(23)
208       .group_output_channels(output_channels)
209       .iterations(1)
210       .TestQS8();
211   }
212 }
213 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_input_stride)214 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_input_stride) {
215   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
216   DeconvolutionOperatorTester()
217     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
218     .kernel_size(1, 1)
219     .groups(2)
220     .group_input_channels(23)
221     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
222     .input_pixel_stride(47)
223     .iterations(3)
224     .TestQS8();
225 }
226 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_output_stride)227 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_output_stride) {
228   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
229   DeconvolutionOperatorTester()
230     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
231     .kernel_size(1, 1)
232     .groups(2)
233     .group_input_channels(23)
234     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
235     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
236     .iterations(3)
237     .TestQS8();
238 }
239 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_qmin)240 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_qmin) {
241   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
242   DeconvolutionOperatorTester()
243     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
244     .kernel_size(1, 1)
245     .groups(2)
246     .group_input_channels(23)
247     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
248     .qmin(128)
249     .iterations(3)
250     .TestQS8();
251 }
252 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_with_qmax)253 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_with_qmax) {
254   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
255   DeconvolutionOperatorTester()
256     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
257     .kernel_size(1, 1)
258     .groups(2)
259     .group_input_channels(23)
260     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
261     .qmax(128)
262     .iterations(3)
263     .TestQS8();
264 }
265 
TEST(DECONVOLUTION_NHWC_QS8,grouped_1x1_without_bias)266 TEST(DECONVOLUTION_NHWC_QS8, grouped_1x1_without_bias) {
267   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
268   DeconvolutionOperatorTester()
269     .has_bias(false)
270     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
271     .kernel_size(1, 1)
272     .groups(2)
273     .group_input_channels(23)
274     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
275     .iterations(3)
276     .TestQS8();
277 }
278 
279 /**************************** Future GEMM path, batched ****************************/
280 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1)281 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1) {
282   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
283   DeconvolutionOperatorTester()
284     .batch_size(2)
285     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
286     .kernel_size(1, 1)
287     .group_input_channels(23)
288     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
289     .iterations(3)
290     .TestQS8();
291 }
292 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_input_width)293 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_input_width) {
294   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
295   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
296     DeconvolutionOperatorTester()
297       .batch_size(2)
298       .input_size(input_height, kUnstridedInputWidth)
299       .kernel_size(1, 1)
300       .group_input_channels(23)
301       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
302       .iterations(1)
303       .TestQS8();
304   }
305 }
306 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_input_height)307 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_input_height) {
308   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
309   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
310     DeconvolutionOperatorTester()
311       .batch_size(2)
312       .input_size(kUnstridedInputHeight, input_width)
313       .kernel_size(1, 1)
314       .group_input_channels(23)
315       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
316       .iterations(1)
317       .TestQS8();
318   }
319 }
320 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_input_channels)321 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_input_channels) {
322   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
323   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
324     DeconvolutionOperatorTester()
325       .batch_size(2)
326       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
327       .kernel_size(1, 1)
328       .group_input_channels(input_channels)
329       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
330       .iterations(1)
331       .TestQS8();
332   }
333 }
334 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_varying_output_channels)335 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_varying_output_channels) {
336   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
337   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
338     DeconvolutionOperatorTester()
339       .batch_size(2)
340       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
341       .kernel_size(1, 1)
342       .group_input_channels(23)
343       .group_output_channels(output_channels)
344       .iterations(1)
345       .TestQS8();
346   }
347 }
348 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_input_stride)349 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_input_stride) {
350   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
351   DeconvolutionOperatorTester()
352     .batch_size(2)
353     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
354     .kernel_size(1, 1)
355     .group_input_channels(23)
356     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
357     .input_pixel_stride(28)
358     .iterations(3)
359     .TestQS8();
360 }
361 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_output_stride)362 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_output_stride) {
363   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
364   DeconvolutionOperatorTester()
365     .batch_size(2)
366     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
367     .kernel_size(1, 1)
368     .group_input_channels(23)
369     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
370     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
371     .iterations(3)
372     .TestQS8();
373 }
374 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_qmin)375 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_qmin) {
376   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
377   DeconvolutionOperatorTester()
378     .batch_size(2)
379     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
380     .kernel_size(1, 1)
381     .group_input_channels(23)
382     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
383     .qmin(128)
384     .iterations(3)
385     .TestQS8();
386 }
387 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_with_qmax)388 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_with_qmax) {
389   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
390   DeconvolutionOperatorTester()
391     .batch_size(2)
392     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
393     .kernel_size(1, 1)
394     .group_input_channels(23)
395     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
396     .qmax(128)
397     .iterations(3)
398     .TestQS8();
399 }
400 
TEST(DECONVOLUTION_NHWC_QS8,batched_1x1_without_bias)401 TEST(DECONVOLUTION_NHWC_QS8, batched_1x1_without_bias) {
402   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
403   DeconvolutionOperatorTester()
404     .has_bias(false)
405     .batch_size(2)
406     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
407     .kernel_size(1, 1)
408     .group_input_channels(23)
409     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
410     .iterations(3)
411     .TestQS8();
412 }
413 
414 /**************************** Future GEMM path, batched, grouped ****************************/
415 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1)416 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1) {
417   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
418   DeconvolutionOperatorTester()
419     .batch_size(2)
420     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
421     .kernel_size(1, 1)
422     .groups(2)
423     .group_input_channels(23)
424     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
425     .iterations(3)
426     .TestQS8();
427 }
428 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_input_width)429 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_input_width) {
430   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
431   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
432     DeconvolutionOperatorTester()
433       .batch_size(2)
434       .input_size(input_height, kUnstridedInputWidth)
435       .kernel_size(1, 1)
436       .groups(2)
437       .group_input_channels(23)
438       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
439       .iterations(1)
440       .TestQS8();
441   }
442 }
443 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_input_height)444 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_input_height) {
445   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
446   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
447     DeconvolutionOperatorTester()
448       .batch_size(2)
449       .input_size(kUnstridedInputHeight, input_width)
450       .kernel_size(1, 1)
451       .groups(2)
452       .group_input_channels(23)
453       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
454       .iterations(1)
455       .TestQS8();
456   }
457 }
458 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_input_channels)459 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_input_channels) {
460   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
461   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
462     DeconvolutionOperatorTester()
463       .batch_size(2)
464       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
465       .kernel_size(1, 1)
466       .groups(2)
467       .group_input_channels(input_channels)
468       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
469       .iterations(1)
470       .TestQS8();
471   }
472 }
473 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_varying_output_channels)474 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_varying_output_channels) {
475   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
476   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
477     DeconvolutionOperatorTester()
478       .batch_size(2)
479       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
480       .kernel_size(1, 1)
481       .groups(2)
482       .group_input_channels(23)
483       .group_output_channels(output_channels)
484       .iterations(1)
485       .TestQS8();
486   }
487 }
488 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_input_stride)489 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_input_stride) {
490   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
491   DeconvolutionOperatorTester()
492     .batch_size(2)
493     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
494     .kernel_size(1, 1)
495     .groups(2)
496     .group_input_channels(23)
497     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
498     .input_pixel_stride(47)
499     .iterations(3)
500     .TestQS8();
501 }
502 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_output_stride)503 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_output_stride) {
504   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
505   DeconvolutionOperatorTester()
506     .batch_size(2)
507     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
508     .kernel_size(1, 1)
509     .groups(2)
510     .group_input_channels(23)
511     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
512     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
513     .iterations(3)
514     .TestQS8();
515 }
516 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_qmin)517 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_qmin) {
518   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
519   DeconvolutionOperatorTester()
520     .batch_size(2)
521     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
522     .kernel_size(1, 1)
523     .groups(2)
524     .group_input_channels(23)
525     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
526     .qmin(128)
527     .iterations(3)
528     .TestQS8();
529 }
530 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_with_qmax)531 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_with_qmax) {
532   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
533   DeconvolutionOperatorTester()
534     .batch_size(2)
535     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
536     .kernel_size(1, 1)
537     .groups(2)
538     .group_input_channels(23)
539     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
540     .qmax(128)
541     .iterations(3)
542     .TestQS8();
543 }
544 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_1x1_without_bias)545 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_1x1_without_bias) {
546   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
547   DeconvolutionOperatorTester()
548     .has_bias(false)
549     .batch_size(2)
550     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
551     .kernel_size(1, 1)
552     .groups(2)
553     .group_input_channels(23)
554     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
555     .iterations(3)
556     .TestQS8();
557 }
558 
559 /**************************** CONV path ****************************/
560 
561 TEST(DECONVOLUTION_NHWC_QS8, 3x3) {
562   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
563   DeconvolutionOperatorTester()
564     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
565     .padding(1)
566     .kernel_size(3, 3)
567     .group_input_channels(15)
568     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
569     .iterations(3)
570     .TestQS8();
571 }
572 
TEST(DECONVOLUTION_NHWC_QS8,Kx3)573 TEST(DECONVOLUTION_NHWC_QS8, Kx3) {
574   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
575   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
576     DeconvolutionOperatorTester()
577       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
578       .padding_width(1)
579       .kernel_size(kernel_height, 3)
580       .group_input_channels(17)
581       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
582       .iterations(3)
583       .TestQS8();
584   }
585 }
586 
587 TEST(DECONVOLUTION_NHWC_QS8, 3xK) {
588   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
589   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
590     DeconvolutionOperatorTester()
591       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
592       .padding_height(1)
593       .kernel_size(3, kernel_width)
594       .group_input_channels(17)
595       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
596       .iterations(3)
597       .TestQS8();
598   }
599 }
600 
601 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_height_padding) {
602   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
603   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
604     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
605       DeconvolutionOperatorTester()
606         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
607         .padding_width(1)
608         .padding_top(padding_top)
609         .padding_bottom(padding_bottom)
610         .kernel_size(3, 3)
611         .group_input_channels(15)
612         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
613         .iterations(1)
614         .TestQS8();
615     }
616   }
617 }
618 
619 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_width_padding) {
620   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
621   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
622     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
623       DeconvolutionOperatorTester()
624         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
625         .padding_height(1)
626         .padding_left(padding_left)
627         .padding_right(padding_right)
628         .kernel_size(3, 3)
629         .group_input_channels(15)
630         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
631         .iterations(1)
632         .TestQS8();
633     }
634   }
635 }
636 
637 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_height_adjustment) {
638   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
639   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
640     DeconvolutionOperatorTester()
641       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
642       .padding(1)
643       .stride_height(adjustment_height + 1)
644       .adjustment_height(adjustment_height)
645       .kernel_size(3, 3)
646       .group_input_channels(15)
647       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
648       .iterations(1)
649       .TestQS8();
650   }
651 }
652 
653 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_width_adjustment) {
654   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
655   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
656     DeconvolutionOperatorTester()
657       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
658       .padding(1)
659       .stride_width(adjustment_width + 1)
660       .adjustment_width(adjustment_width)
661       .kernel_size(3, 3)
662       .group_input_channels(15)
663       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
664       .iterations(1)
665       .TestQS8();
666   }
667 }
668 
669 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_input_height) {
670   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
671   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
672     DeconvolutionOperatorTester()
673       .input_size(input_height, kUnstridedInputWidth)
674       .padding(1)
675       .kernel_size(3, 3)
676       .group_input_channels(15)
677       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
678       .iterations(1)
679       .TestQS8();
680   }
681 }
682 
683 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_input_width) {
684   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
685   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
686     DeconvolutionOperatorTester()
687       .input_size(kUnstridedInputHeight, input_width)
688       .padding(1)
689       .kernel_size(3, 3)
690       .group_input_channels(15)
691       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
692       .iterations(1)
693       .TestQS8();
694   }
695 }
696 
697 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_input_channels) {
698   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
699   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
700     DeconvolutionOperatorTester()
701       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
702       .padding(1)
703       .kernel_size(3, 3)
704       .group_input_channels(input_channels)
705       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
706       .iterations(1)
707       .TestQS8();
708   }
709 }
710 
711 TEST(DECONVOLUTION_NHWC_QS8, 3x3_varying_output_channels) {
712   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
713   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
714     DeconvolutionOperatorTester()
715       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
716       .padding(1)
717       .kernel_size(3, 3)
718       .group_input_channels(23)
719       .group_output_channels(output_channels)
720       .iterations(1)
721       .TestQS8();
722   }
723 }
724 
725 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_height_dilation) {
726   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
727   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
728     DeconvolutionOperatorTester()
729       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
730       .padding(1)
731       .kernel_size(3, 3)
732       .dilation_height(dilation_height)
733       .group_input_channels(23)
734       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
735       .iterations(3)
736       .TestQS8();
737   }
738 }
739 
740 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_width_dilation) {
741   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
742   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
743     DeconvolutionOperatorTester()
744       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
745       .padding(1)
746       .kernel_size(3, 3)
747       .dilation_width(dilation_width)
748       .group_input_channels(23)
749       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
750       .iterations(3)
751       .TestQS8();
752   }
753 }
754 
755 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_height_dilation_and_stride) {
756   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
757   DeconvolutionOperatorTester()
758     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
759     .padding(1)
760     .kernel_size(3, 3)
761     .dilation_height(3)
762     .stride_height(2)
763     .group_input_channels(23)
764     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
765     .iterations(3)
766     .TestQS8();
767 }
768 
769 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_width_dilation_and_stride) {
770   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
771   DeconvolutionOperatorTester()
772     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
773     .padding(1)
774     .kernel_size(3, 3)
775     .dilation_width(3)
776     .stride_width(2)
777     .group_input_channels(23)
778     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
779     .iterations(3)
780     .TestQS8();
781 }
782 
783 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_input_stride) {
784   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
785   DeconvolutionOperatorTester()
786     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
787     .padding(1)
788     .kernel_size(3, 3)
789     .group_input_channels(23)
790     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
791     .input_pixel_stride(28)
792     .iterations(3)
793     .TestQS8();
794 }
795 
796 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_output_stride) {
797   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
798   DeconvolutionOperatorTester()
799     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
800     .padding(1)
801     .kernel_size(3, 3)
802     .group_input_channels(23)
803     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
804     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
805     .iterations(3)
806     .TestQS8();
807 }
808 
809 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_qmin) {
810   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
811   DeconvolutionOperatorTester()
812     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
813     .padding(1)
814     .kernel_size(3, 3)
815     .group_input_channels(23)
816     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
817     .qmin(128)
818     .iterations(3)
819     .TestQS8();
820 }
821 
822 TEST(DECONVOLUTION_NHWC_QS8, 3x3_with_qmax) {
823   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
824   DeconvolutionOperatorTester()
825     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
826     .padding(1)
827     .kernel_size(3, 3)
828     .group_input_channels(23)
829     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
830     .qmax(128)
831     .iterations(3)
832     .TestQS8();
833 }
834 
835 TEST(DECONVOLUTION_NHWC_QS8, 3x3_without_bias) {
836   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
837   DeconvolutionOperatorTester()
838     .has_bias(false)
839     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
840     .padding(1)
841     .kernel_size(3, 3)
842     .group_input_channels(23)
843     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
844     .iterations(3)
845     .TestQS8();
846 }
847 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_3x3)848 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_3x3) {
849   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
850   DeconvolutionOperatorTester()
851     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
852     .padding(1)
853     .kernel_size(3, 3)
854     .group_input_channels(15)
855     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
856     .use_weights_cache(true)
857     .iterations(3)
858     .TestQS8();
859 }
860 
861 /**************************** CONV path, grouped ****************************/
862 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3)863 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3) {
864   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
865   DeconvolutionOperatorTester()
866     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
867     .padding(1)
868     .kernel_size(3, 3)
869     .groups(2)
870     .group_input_channels(15)
871     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
872     .iterations(3)
873     .TestQS8();
874 }
875 
TEST(DECONVOLUTION_NHWC_QS8,grouped_Kx3)876 TEST(DECONVOLUTION_NHWC_QS8, grouped_Kx3) {
877   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
878   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
879     DeconvolutionOperatorTester()
880       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
881       .padding_width(1)
882       .kernel_size(kernel_height, 3)
883       .groups(2)
884       .group_input_channels(17)
885       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
886       .iterations(3)
887       .TestQS8();
888   }
889 }
890 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3xK)891 TEST(DECONVOLUTION_NHWC_QS8, grouped_3xK) {
892   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
893   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
894     DeconvolutionOperatorTester()
895       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
896       .padding_height(1)
897       .kernel_size(3, kernel_width)
898       .groups(2)
899       .group_input_channels(17)
900       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
901       .iterations(3)
902       .TestQS8();
903   }
904 }
905 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_height_padding)906 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_height_padding) {
907   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
908   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
909     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
910       DeconvolutionOperatorTester()
911         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
912         .padding_width(1)
913         .padding_top(padding_top)
914         .padding_bottom(padding_bottom)
915         .kernel_size(3, 3)
916         .groups(2)
917         .group_input_channels(15)
918         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
919         .iterations(1)
920         .TestQS8();
921     }
922   }
923 }
924 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_width_padding)925 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_width_padding) {
926   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
927   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
928     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
929       DeconvolutionOperatorTester()
930         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
931         .padding_height(1)
932         .padding_left(padding_left)
933         .padding_right(padding_right)
934         .kernel_size(3, 3)
935         .groups(2)
936         .group_input_channels(15)
937         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
938         .iterations(1)
939         .TestQS8();
940     }
941   }
942 }
943 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_height_adjustment)944 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_height_adjustment) {
945   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
946   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
947     DeconvolutionOperatorTester()
948       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
949       .padding(1)
950       .stride_height(adjustment_height + 1)
951       .adjustment_height(adjustment_height)
952       .kernel_size(3, 3)
953       .groups(2)
954       .group_input_channels(15)
955       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
956       .iterations(1)
957       .TestQS8();
958   }
959 }
960 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_width_adjustment)961 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_width_adjustment) {
962   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
963   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
964     DeconvolutionOperatorTester()
965       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
966       .padding(1)
967       .stride_width(adjustment_width + 1)
968       .adjustment_width(adjustment_width)
969       .kernel_size(3, 3)
970       .groups(2)
971       .group_input_channels(15)
972       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
973       .iterations(1)
974       .TestQS8();
975   }
976 }
977 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_input_height)978 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_input_height) {
979   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
980   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
981     DeconvolutionOperatorTester()
982       .input_size(input_height, kUnstridedInputWidth)
983       .padding(1)
984       .kernel_size(3, 3)
985       .groups(2)
986       .group_input_channels(15)
987       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
988       .iterations(1)
989       .TestQS8();
990   }
991 }
992 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_input_width)993 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_input_width) {
994   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
995   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
996     DeconvolutionOperatorTester()
997       .input_size(kUnstridedInputHeight, input_width)
998       .padding(1)
999       .kernel_size(3, 3)
1000       .groups(2)
1001       .group_input_channels(15)
1002       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1003       .iterations(1)
1004       .TestQS8();
1005   }
1006 }
1007 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_input_channels)1008 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_input_channels) {
1009   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1010   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
1011     DeconvolutionOperatorTester()
1012       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1013       .padding(1)
1014       .kernel_size(3, 3)
1015       .groups(2)
1016       .group_input_channels(input_channels)
1017       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1018       .iterations(1)
1019       .TestQS8();
1020   }
1021 }
1022 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_varying_output_channels)1023 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_varying_output_channels) {
1024   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1025   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
1026     DeconvolutionOperatorTester()
1027       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1028       .padding(1)
1029       .kernel_size(3, 3)
1030       .groups(2)
1031       .group_input_channels(23)
1032       .group_output_channels(output_channels)
1033       .iterations(1)
1034       .TestQS8();
1035   }
1036 }
1037 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_height_dilation)1038 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_height_dilation) {
1039   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1040   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
1041     DeconvolutionOperatorTester()
1042       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1043       .padding(1)
1044       .kernel_size(3, 3)
1045       .dilation_height(dilation_height)
1046       .groups(2)
1047       .group_input_channels(23)
1048       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1049       .iterations(3)
1050       .TestQS8();
1051   }
1052 }
1053 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_width_dilation)1054 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_width_dilation) {
1055   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1056   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
1057     DeconvolutionOperatorTester()
1058       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1059       .padding(1)
1060       .kernel_size(3, 3)
1061       .dilation_width(dilation_width)
1062       .groups(2)
1063       .group_input_channels(23)
1064       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1065       .iterations(3)
1066       .TestQS8();
1067   }
1068 }
1069 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_height_dilation_and_stride)1070 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_height_dilation_and_stride) {
1071   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1072   DeconvolutionOperatorTester()
1073     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1074     .padding(1)
1075     .kernel_size(3, 3)
1076     .dilation_height(3)
1077     .stride_height(2)
1078     .groups(2)
1079     .group_input_channels(23)
1080     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1081     .iterations(3)
1082     .TestQS8();
1083 }
1084 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_width_dilation_and_stride)1085 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_width_dilation_and_stride) {
1086   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1087   DeconvolutionOperatorTester()
1088     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1089     .padding(1)
1090     .kernel_size(3, 3)
1091     .dilation_width(3)
1092     .stride_width(2)
1093     .groups(2)
1094     .group_input_channels(23)
1095     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1096     .iterations(3)
1097     .TestQS8();
1098 }
1099 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_input_stride)1100 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_input_stride) {
1101   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1102   DeconvolutionOperatorTester()
1103     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1104     .padding(1)
1105     .kernel_size(3, 3)
1106     .groups(2)
1107     .group_input_channels(23)
1108     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1109     .input_pixel_stride(47)
1110     .iterations(3)
1111     .TestQS8();
1112 }
1113 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_output_stride)1114 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_output_stride) {
1115   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1116   DeconvolutionOperatorTester()
1117     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1118     .padding(1)
1119     .kernel_size(3, 3)
1120     .groups(2)
1121     .group_input_channels(23)
1122     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
1123     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
1124     .iterations(3)
1125     .TestQS8();
1126 }
1127 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_qmin)1128 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_qmin) {
1129   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1130   DeconvolutionOperatorTester()
1131     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1132     .padding(1)
1133     .kernel_size(3, 3)
1134     .groups(2)
1135     .group_input_channels(23)
1136     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1137     .qmin(128)
1138     .iterations(3)
1139     .TestQS8();
1140 }
1141 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_with_qmax)1142 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_with_qmax) {
1143   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1144   DeconvolutionOperatorTester()
1145     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1146     .padding(1)
1147     .kernel_size(3, 3)
1148     .groups(2)
1149     .group_input_channels(23)
1150     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1151     .qmax(128)
1152     .iterations(3)
1153     .TestQS8();
1154 }
1155 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3_without_bias)1156 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3_without_bias) {
1157   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1158   DeconvolutionOperatorTester()
1159     .has_bias(false)
1160     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1161     .padding(1)
1162     .kernel_size(3, 3)
1163     .groups(2)
1164     .group_input_channels(23)
1165     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1166     .iterations(3)
1167     .TestQS8();
1168 }
1169 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_grouped_3x3)1170 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_grouped_3x3) {
1171   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1172   DeconvolutionOperatorTester()
1173     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1174     .padding(1)
1175     .kernel_size(3, 3)
1176     .groups(2)
1177     .group_input_channels(15)
1178     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1179     .use_weights_cache(true)
1180     .iterations(3)
1181     .TestQS8();
1182 }
1183 
1184 /**************************** CONV path, batched ****************************/
1185 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3)1186 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3) {
1187   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1188   DeconvolutionOperatorTester()
1189     .batch_size(2)
1190     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1191     .padding(1)
1192     .kernel_size(3, 3)
1193     .group_input_channels(15)
1194     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1195     .iterations(3)
1196     .TestQS8();
1197 }
1198 
TEST(DECONVOLUTION_NHWC_QS8,batched_Kx3)1199 TEST(DECONVOLUTION_NHWC_QS8, batched_Kx3) {
1200   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1201   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
1202     DeconvolutionOperatorTester()
1203       .batch_size(2)
1204       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1205       .padding_width(1)
1206       .kernel_size(kernel_height, 3)
1207       .group_input_channels(17)
1208       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1209       .iterations(3)
1210       .TestQS8();
1211   }
1212 }
1213 
TEST(DECONVOLUTION_NHWC_QS8,batched_3xK)1214 TEST(DECONVOLUTION_NHWC_QS8, batched_3xK) {
1215   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1216   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
1217     DeconvolutionOperatorTester()
1218       .batch_size(2)
1219       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1220       .padding_height(1)
1221       .kernel_size(3, kernel_width)
1222       .group_input_channels(17)
1223       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1224       .iterations(3)
1225       .TestQS8();
1226   }
1227 }
1228 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_height_padding)1229 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_height_padding) {
1230   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1231   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
1232     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
1233       DeconvolutionOperatorTester()
1234         .batch_size(2)
1235         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1236         .padding_width(1)
1237         .padding_top(padding_top)
1238         .padding_bottom(padding_bottom)
1239         .kernel_size(3, 3)
1240         .group_input_channels(15)
1241         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1242         .iterations(1)
1243         .TestQS8();
1244     }
1245   }
1246 }
1247 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_width_padding)1248 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_width_padding) {
1249   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1250   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
1251     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
1252       DeconvolutionOperatorTester()
1253         .batch_size(2)
1254         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1255         .padding_height(1)
1256         .padding_left(padding_left)
1257         .padding_right(padding_right)
1258         .kernel_size(3, 3)
1259         .group_input_channels(15)
1260         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1261         .iterations(1)
1262         .TestQS8();
1263     }
1264   }
1265 }
1266 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_height_adjustment)1267 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_height_adjustment) {
1268   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1269   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
1270     DeconvolutionOperatorTester()
1271       .batch_size(2)
1272       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1273       .padding(1)
1274       .stride_height(adjustment_height + 1)
1275       .adjustment_height(adjustment_height)
1276       .kernel_size(3, 3)
1277       .group_input_channels(15)
1278       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1279       .iterations(1)
1280       .TestQS8();
1281   }
1282 }
1283 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_width_adjustment)1284 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_width_adjustment) {
1285   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1286   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
1287     DeconvolutionOperatorTester()
1288       .batch_size(2)
1289       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1290       .padding(1)
1291       .stride_width(adjustment_width + 1)
1292       .adjustment_width(adjustment_width)
1293       .kernel_size(3, 3)
1294       .group_input_channels(15)
1295       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1296       .iterations(1)
1297       .TestQS8();
1298   }
1299 }
1300 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_input_height)1301 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_input_height) {
1302   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1303   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
1304     DeconvolutionOperatorTester()
1305       .batch_size(2)
1306       .input_size(input_height, kUnstridedInputWidth)
1307       .padding(1)
1308       .kernel_size(3, 3)
1309       .group_input_channels(15)
1310       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1311       .iterations(1)
1312       .TestQS8();
1313   }
1314 }
1315 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_input_width)1316 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_input_width) {
1317   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1318   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
1319     DeconvolutionOperatorTester()
1320       .batch_size(2)
1321       .input_size(kUnstridedInputHeight, input_width)
1322       .padding(1)
1323       .kernel_size(3, 3)
1324       .group_input_channels(15)
1325       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1326       .iterations(1)
1327       .TestQS8();
1328   }
1329 }
1330 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_input_channels)1331 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_input_channels) {
1332   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1333   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
1334     DeconvolutionOperatorTester()
1335       .batch_size(2)
1336       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1337       .padding(1)
1338       .kernel_size(3, 3)
1339       .group_input_channels(input_channels)
1340       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1341       .iterations(1)
1342       .TestQS8();
1343   }
1344 }
1345 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_varying_output_channels)1346 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_varying_output_channels) {
1347   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1348   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
1349     DeconvolutionOperatorTester()
1350       .batch_size(2)
1351       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1352       .padding(1)
1353       .kernel_size(3, 3)
1354       .group_input_channels(23)
1355       .group_output_channels(output_channels)
1356       .iterations(1)
1357       .TestQS8();
1358   }
1359 }
1360 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_height_dilation)1361 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_height_dilation) {
1362   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1363   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
1364     DeconvolutionOperatorTester()
1365       .batch_size(2)
1366       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1367       .padding(1)
1368       .kernel_size(3, 3)
1369       .dilation_height(dilation_height)
1370       .group_input_channels(23)
1371       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1372       .iterations(3)
1373       .TestQS8();
1374   }
1375 }
1376 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_width_dilation)1377 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_width_dilation) {
1378   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1379   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
1380     DeconvolutionOperatorTester()
1381       .batch_size(2)
1382       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1383       .padding(1)
1384       .kernel_size(3, 3)
1385       .dilation_width(dilation_width)
1386       .group_input_channels(23)
1387       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1388       .iterations(3)
1389       .TestQS8();
1390   }
1391 }
1392 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_height_dilation_and_stride)1393 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_height_dilation_and_stride) {
1394   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1395   DeconvolutionOperatorTester()
1396     .batch_size(2)
1397     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1398     .padding(1)
1399     .kernel_size(3, 3)
1400     .dilation_height(3)
1401     .stride_height(2)
1402     .group_input_channels(23)
1403     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1404     .iterations(3)
1405     .TestQS8();
1406 }
1407 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_width_dilation_and_stride)1408 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_width_dilation_and_stride) {
1409   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1410   DeconvolutionOperatorTester()
1411     .batch_size(2)
1412     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1413     .padding(1)
1414     .kernel_size(3, 3)
1415     .dilation_width(3)
1416     .stride_width(2)
1417     .group_input_channels(23)
1418     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1419     .iterations(3)
1420     .TestQS8();
1421 }
1422 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_input_stride)1423 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_input_stride) {
1424   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1425   DeconvolutionOperatorTester()
1426     .batch_size(2)
1427     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1428     .padding(1)
1429     .kernel_size(3, 3)
1430     .group_input_channels(23)
1431     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1432     .input_pixel_stride(28)
1433     .iterations(3)
1434     .TestQS8();
1435 }
1436 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_output_stride)1437 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_output_stride) {
1438   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1439   DeconvolutionOperatorTester()
1440     .batch_size(2)
1441     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1442     .padding(1)
1443     .kernel_size(3, 3)
1444     .group_input_channels(23)
1445     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1446     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
1447     .iterations(3)
1448     .TestQS8();
1449 }
1450 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_qmin)1451 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_qmin) {
1452   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1453   DeconvolutionOperatorTester()
1454     .batch_size(2)
1455     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1456     .padding(1)
1457     .kernel_size(3, 3)
1458     .group_input_channels(23)
1459     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1460     .qmin(128)
1461     .iterations(3)
1462     .TestQS8();
1463 }
1464 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_with_qmax)1465 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_with_qmax) {
1466   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1467   DeconvolutionOperatorTester()
1468     .batch_size(2)
1469     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1470     .padding(1)
1471     .kernel_size(3, 3)
1472     .group_input_channels(23)
1473     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1474     .qmax(128)
1475     .iterations(3)
1476     .TestQS8();
1477 }
1478 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3_without_bias)1479 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3_without_bias) {
1480   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1481   DeconvolutionOperatorTester()
1482     .has_bias(false)
1483     .batch_size(2)
1484     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1485     .padding(1)
1486     .kernel_size(3, 3)
1487     .group_input_channels(23)
1488     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1489     .iterations(3)
1490     .TestQS8();
1491 }
1492 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_3x3)1493 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_3x3) {
1494   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1495   DeconvolutionOperatorTester()
1496     .batch_size(2)
1497     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1498     .padding(1)
1499     .kernel_size(3, 3)
1500     .group_input_channels(15)
1501     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1502     .use_weights_cache(true)
1503     .iterations(3)
1504     .TestQS8();
1505 }
1506 
1507 /**************************** CONV path, grouped, batched ****************************/
1508 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3)1509 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3) {
1510   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1511   DeconvolutionOperatorTester()
1512     .batch_size(2)
1513     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1514     .padding(1)
1515     .kernel_size(3, 3)
1516     .groups(2)
1517     .group_input_channels(15)
1518     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1519     .iterations(3)
1520     .TestQS8();
1521 }
1522 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_Kx3)1523 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_Kx3) {
1524   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1525   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
1526     DeconvolutionOperatorTester()
1527       .batch_size(2)
1528       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1529       .padding_width(1)
1530       .kernel_size(kernel_height, 3)
1531       .groups(2)
1532       .group_input_channels(17)
1533       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1534       .iterations(3)
1535       .TestQS8();
1536   }
1537 }
1538 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3xK)1539 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3xK) {
1540   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1541   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
1542     DeconvolutionOperatorTester()
1543       .batch_size(2)
1544       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1545       .padding_height(1)
1546       .kernel_size(3, kernel_width)
1547       .groups(2)
1548       .group_input_channels(17)
1549       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1550       .iterations(3)
1551       .TestQS8();
1552   }
1553 }
1554 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_height_padding)1555 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_height_padding) {
1556   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1557   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
1558     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
1559       DeconvolutionOperatorTester()
1560         .batch_size(2)
1561         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1562         .padding_width(1)
1563         .padding_top(padding_top)
1564         .padding_bottom(padding_bottom)
1565         .kernel_size(3, 3)
1566         .groups(2)
1567         .group_input_channels(15)
1568         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1569         .iterations(1)
1570         .TestQS8();
1571     }
1572   }
1573 }
1574 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_width_padding)1575 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_width_padding) {
1576   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1577   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
1578     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
1579       DeconvolutionOperatorTester()
1580         .batch_size(2)
1581         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1582         .padding_height(1)
1583         .padding_left(padding_left)
1584         .padding_right(padding_right)
1585         .kernel_size(3, 3)
1586         .groups(2)
1587         .group_input_channels(15)
1588         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1589         .iterations(1)
1590         .TestQS8();
1591     }
1592   }
1593 }
1594 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_height_adjustment)1595 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_height_adjustment) {
1596   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1597   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
1598     DeconvolutionOperatorTester()
1599       .batch_size(2)
1600       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1601       .padding(1)
1602       .stride_height(adjustment_height + 1)
1603       .adjustment_height(adjustment_height)
1604       .kernel_size(3, 3)
1605       .groups(2)
1606       .group_input_channels(15)
1607       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1608       .iterations(1)
1609       .TestQS8();
1610   }
1611 }
1612 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_width_adjustment)1613 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_width_adjustment) {
1614   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1615   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
1616     DeconvolutionOperatorTester()
1617       .batch_size(2)
1618       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1619       .padding(1)
1620       .stride_width(adjustment_width + 1)
1621       .adjustment_width(adjustment_width)
1622       .kernel_size(3, 3)
1623       .groups(2)
1624       .group_input_channels(15)
1625       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1626       .iterations(1)
1627       .TestQS8();
1628   }
1629 }
1630 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_input_height)1631 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_input_height) {
1632   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1633   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
1634     DeconvolutionOperatorTester()
1635       .batch_size(2)
1636       .input_size(input_height, kUnstridedInputWidth)
1637       .padding(1)
1638       .kernel_size(3, 3)
1639       .groups(2)
1640       .group_input_channels(15)
1641       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1642       .iterations(1)
1643       .TestQS8();
1644   }
1645 }
1646 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_input_width)1647 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_input_width) {
1648   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1649   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
1650     DeconvolutionOperatorTester()
1651       .batch_size(2)
1652       .input_size(kUnstridedInputHeight, input_width)
1653       .padding(1)
1654       .kernel_size(3, 3)
1655       .groups(2)
1656       .group_input_channels(15)
1657       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1658       .iterations(1)
1659       .TestQS8();
1660   }
1661 }
1662 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_input_channels)1663 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_input_channels) {
1664   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1665   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
1666     DeconvolutionOperatorTester()
1667       .batch_size(2)
1668       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1669       .padding(1)
1670       .kernel_size(3, 3)
1671       .groups(2)
1672       .group_input_channels(input_channels)
1673       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1674       .iterations(1)
1675       .TestQS8();
1676   }
1677 }
1678 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_varying_output_channels)1679 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_varying_output_channels) {
1680   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1681   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
1682     DeconvolutionOperatorTester()
1683       .batch_size(2)
1684       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1685       .padding(1)
1686       .kernel_size(3, 3)
1687       .groups(2)
1688       .group_input_channels(23)
1689       .group_output_channels(output_channels)
1690       .iterations(1)
1691       .TestQS8();
1692   }
1693 }
1694 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_height_dilation)1695 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_height_dilation) {
1696   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1697   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
1698     DeconvolutionOperatorTester()
1699       .batch_size(2)
1700       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1701       .padding(1)
1702       .kernel_size(3, 3)
1703       .dilation_height(dilation_height)
1704       .groups(2)
1705       .group_input_channels(23)
1706       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1707       .iterations(3)
1708       .TestQS8();
1709   }
1710 }
1711 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_width_dilation)1712 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_width_dilation) {
1713   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1714   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
1715     DeconvolutionOperatorTester()
1716       .batch_size(2)
1717       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1718       .padding(1)
1719       .kernel_size(3, 3)
1720       .dilation_width(dilation_width)
1721       .groups(2)
1722       .group_input_channels(23)
1723       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1724       .iterations(3)
1725       .TestQS8();
1726   }
1727 }
1728 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_height_dilation_and_stride)1729 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_height_dilation_and_stride) {
1730   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1731   DeconvolutionOperatorTester()
1732     .batch_size(2)
1733     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1734     .padding(1)
1735     .kernel_size(3, 3)
1736     .dilation_height(3)
1737     .stride_width(2)
1738     .groups(2)
1739     .group_input_channels(23)
1740     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1741     .iterations(3)
1742     .TestQS8();
1743 }
1744 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_width_dilation_and_stride)1745 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_width_dilation_and_stride) {
1746   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1747   DeconvolutionOperatorTester()
1748     .batch_size(2)
1749     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1750     .padding(1)
1751     .kernel_size(3, 3)
1752     .dilation_width(3)
1753     .stride_width(2)
1754     .groups(2)
1755     .group_input_channels(23)
1756     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1757     .iterations(3)
1758     .TestQS8();
1759 }
1760 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_input_stride)1761 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_input_stride) {
1762   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1763   DeconvolutionOperatorTester()
1764     .batch_size(2)
1765     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1766     .padding(1)
1767     .kernel_size(3, 3)
1768     .groups(2)
1769     .group_input_channels(23)
1770     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1771     .input_pixel_stride(47)
1772     .iterations(3)
1773     .TestQS8();
1774 }
1775 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_output_stride)1776 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_output_stride) {
1777   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1778   DeconvolutionOperatorTester()
1779     .batch_size(2)
1780     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1781     .padding(1)
1782     .kernel_size(3, 3)
1783     .groups(2)
1784     .group_input_channels(23)
1785     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
1786     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
1787     .iterations(3)
1788     .TestQS8();
1789 }
1790 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_qmin)1791 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_qmin) {
1792   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1793   DeconvolutionOperatorTester()
1794     .batch_size(2)
1795     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1796     .padding(1)
1797     .kernel_size(3, 3)
1798     .groups(2)
1799     .group_input_channels(23)
1800     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1801     .qmin(128)
1802     .iterations(3)
1803     .TestQS8();
1804 }
1805 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_with_qmax)1806 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_with_qmax) {
1807   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1808   DeconvolutionOperatorTester()
1809     .batch_size(2)
1810     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1811     .padding(1)
1812     .kernel_size(3, 3)
1813     .groups(2)
1814     .group_input_channels(23)
1815     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1816     .qmax(128)
1817     .iterations(3)
1818     .TestQS8();
1819 }
1820 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3_without_bias)1821 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3_without_bias) {
1822   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1823   DeconvolutionOperatorTester()
1824     .has_bias(false)
1825     .batch_size(2)
1826     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1827     .padding(1)
1828     .kernel_size(3, 3)
1829     .groups(2)
1830     .group_input_channels(23)
1831     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1832     .iterations(3)
1833     .TestQS8();
1834 }
1835 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_grouped_3x3)1836 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_grouped_3x3) {
1837   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1838   DeconvolutionOperatorTester()
1839     .batch_size(2)
1840     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1841     .padding(1)
1842     .kernel_size(3, 3)
1843     .groups(2)
1844     .group_input_channels(15)
1845     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1846     .use_weights_cache(true)
1847     .iterations(3)
1848     .TestQS8();
1849 }
1850 
1851 /**************************** CONV path, setup ****************************/
1852 
1853 TEST(DECONVOLUTION_NHWC_QS8, 3x3_setup_changing_batch) {
1854   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1855   DeconvolutionOperatorTester()
1856     .batch_size(2)
1857     .next_batch_size(5)
1858     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1859     .kernel_height(3)
1860     .kernel_width(5)
1861     .groups(2)
1862     .group_input_channels(15)
1863     .group_output_channels(17)
1864     .TestSetupQS8();
1865 }
1866 
1867 TEST(DECONVOLUTION_NHWC_QS8, 3x3_setup_changing_height) {
1868   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1869   DeconvolutionOperatorTester()
1870     .batch_size(2)
1871     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1872     .next_input_height(kUnstridedInputHeight + 3)
1873     .kernel_height(3)
1874     .kernel_width(5)
1875     .groups(2)
1876     .group_input_channels(15)
1877     .group_output_channels(17)
1878     .TestSetupQS8();
1879 }
1880 
1881 TEST(DECONVOLUTION_NHWC_QS8, 3x3_setup_changing_width) {
1882   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1883   DeconvolutionOperatorTester()
1884     .batch_size(2)
1885     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
1886     .next_input_width(kUnstridedInputWidth + 3)
1887     .kernel_height(3)
1888     .kernel_width(5)
1889     .groups(2)
1890     .group_input_channels(15)
1891     .group_output_channels(17)
1892     .TestSetupQS8();
1893 }
1894 
1895 /**************************** SUBCONV2D/IGEMM path ****************************/
1896 
1897 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2) {
1898   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1899   DeconvolutionOperatorTester()
1900     .input_size(kStridedInputHeight, kStridedInputWidth)
1901     .padding(1)
1902     .kernel_size(3, 3)
1903     .stride(2)
1904     .group_input_channels(15)
1905     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1906     .iterations(3)
1907     .TestQS8();
1908 }
1909 
TEST(DECONVOLUTION_NHWC_QS8,Kx3s2)1910 TEST(DECONVOLUTION_NHWC_QS8, Kx3s2) {
1911   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1912   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
1913     DeconvolutionOperatorTester()
1914       .input_size(kStridedInputHeight, kStridedInputWidth)
1915       .padding_width(1)
1916       .kernel_size(kernel_height, 3)
1917       .stride(2)
1918       .group_input_channels(17)
1919       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1920       .iterations(3)
1921       .TestQS8();
1922   }
1923 }
1924 
1925 TEST(DECONVOLUTION_NHWC_QS8, 3xKs2) {
1926   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1927   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
1928     DeconvolutionOperatorTester()
1929       .input_size(kStridedInputHeight, kStridedInputWidth)
1930       .padding_height(1)
1931       .kernel_size(3, kernel_width)
1932       .stride(2)
1933       .group_input_channels(17)
1934       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1935       .iterations(3)
1936       .TestQS8();
1937   }
1938 }
1939 
1940 TEST(DECONVOLUTION_NHWC_QS8, 3x3sSx1) {
1941   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1942   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
1943     DeconvolutionOperatorTester()
1944       .input_size(kStridedInputHeight, kStridedInputWidth)
1945       .padding(1)
1946       .padding_width(1)
1947       .kernel_size(3, 3)
1948       .stride_height(stride_height)
1949       .group_input_channels(17)
1950       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1951       .iterations(3)
1952       .TestQS8();
1953   }
1954 }
1955 
1956 TEST(DECONVOLUTION_NHWC_QS8, 3x3s1xS) {
1957   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1958   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
1959     DeconvolutionOperatorTester()
1960       .input_size(kStridedInputHeight, kStridedInputWidth)
1961       .padding(1)
1962       .padding_width(1)
1963       .kernel_size(3, 3)
1964       .stride_width(stride_width)
1965       .group_input_channels(17)
1966       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1967       .iterations(3)
1968       .TestQS8();
1969   }
1970 }
1971 
1972 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_height_padding) {
1973   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1974   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
1975     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
1976       DeconvolutionOperatorTester()
1977         .input_size(kStridedInputHeight, kStridedInputWidth)
1978         .padding_width(1)
1979         .padding_top(padding_top)
1980         .padding_bottom(padding_bottom)
1981         .kernel_size(3, 3)
1982         .stride(2)
1983         .group_input_channels(15)
1984         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
1985         .iterations(1)
1986         .TestQS8();
1987     }
1988   }
1989 }
1990 
1991 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_width_padding) {
1992   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1993   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
1994     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
1995       DeconvolutionOperatorTester()
1996         .input_size(kStridedInputHeight, kStridedInputWidth)
1997         .padding_height(1)
1998         .padding_left(padding_left)
1999         .padding_right(padding_right)
2000         .kernel_size(3, 3)
2001         .stride(2)
2002         .group_input_channels(15)
2003         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2004         .iterations(1)
2005         .TestQS8();
2006     }
2007   }
2008 }
2009 
2010 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_height_adjustment) {
2011   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2012   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2013     DeconvolutionOperatorTester()
2014       .input_size(kStridedInputHeight, kStridedInputWidth)
2015       .padding(1)
2016       .adjustment_height(adjustment_height)
2017       .kernel_size(3, 3)
2018       .stride(2)
2019       .group_input_channels(15)
2020       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2021       .iterations(1)
2022       .TestQS8();
2023   }
2024 }
2025 
2026 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_width_adjustment) {
2027   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2028   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2029     DeconvolutionOperatorTester()
2030       .input_size(kStridedInputHeight, kStridedInputWidth)
2031       .padding(1)
2032       .adjustment_width(adjustment_width)
2033       .kernel_size(3, 3)
2034       .stride(2)
2035       .group_input_channels(15)
2036       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2037       .iterations(1)
2038       .TestQS8();
2039   }
2040 }
2041 
2042 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_input_height) {
2043   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2044   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2045     DeconvolutionOperatorTester()
2046       .input_size(input_height, kStridedInputWidth)
2047       .padding(1)
2048       .kernel_size(3, 3)
2049       .stride(2)
2050       .group_input_channels(15)
2051       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2052       .iterations(1)
2053       .TestQS8();
2054   }
2055 }
2056 
2057 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_input_width) {
2058   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2059   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2060     DeconvolutionOperatorTester()
2061       .input_size(kStridedInputHeight, kStridedInputWidth)
2062       .padding(1)
2063       .kernel_size(3, 3)
2064       .stride(2)
2065       .group_input_channels(15)
2066       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2067       .iterations(1)
2068       .TestQS8();
2069   }
2070 }
2071 
2072 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_input_channels) {
2073   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2074   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
2075     DeconvolutionOperatorTester()
2076       .input_size(kStridedInputHeight, kStridedInputWidth)
2077       .padding(1)
2078       .kernel_size(3, 3)
2079       .stride(2)
2080       .group_input_channels(input_channels)
2081       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2082       .iterations(1)
2083       .TestQS8();
2084   }
2085 }
2086 
2087 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_varying_output_channels) {
2088   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2089   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
2090     DeconvolutionOperatorTester()
2091       .input_size(kStridedInputHeight, kStridedInputWidth)
2092       .padding(1)
2093       .kernel_size(3, 3)
2094       .stride(2)
2095       .group_input_channels(23)
2096       .group_output_channels(output_channels)
2097       .iterations(1)
2098       .TestQS8();
2099   }
2100 }
2101 
2102 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_input_stride) {
2103   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2104   DeconvolutionOperatorTester()
2105     .input_size(kStridedInputHeight, kStridedInputWidth)
2106     .padding(1)
2107     .kernel_size(3, 3)
2108     .stride(2)
2109     .group_input_channels(23)
2110     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2111     .input_pixel_stride(28)
2112     .iterations(3)
2113     .TestQS8();
2114 }
2115 
2116 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_output_stride) {
2117   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2118   DeconvolutionOperatorTester()
2119     .input_size(kStridedInputHeight, kStridedInputWidth)
2120     .padding(1)
2121     .kernel_size(3, 3)
2122     .stride(2)
2123     .group_input_channels(23)
2124     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2125     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
2126     .iterations(3)
2127     .TestQS8();
2128 }
2129 
2130 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_qmin) {
2131   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2132   DeconvolutionOperatorTester()
2133     .input_size(kStridedInputHeight, kStridedInputWidth)
2134     .padding(1)
2135     .kernel_size(3, 3)
2136     .stride(2)
2137     .group_input_channels(23)
2138     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2139     .qmin(128)
2140     .iterations(3)
2141     .TestQS8();
2142 }
2143 
2144 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_with_qmax) {
2145   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2146   DeconvolutionOperatorTester()
2147     .input_size(kStridedInputHeight, kStridedInputWidth)
2148     .padding(1)
2149     .kernel_size(3, 3)
2150     .stride(2)
2151     .group_input_channels(23)
2152     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2153     .qmax(128)
2154     .iterations(3)
2155     .TestQS8();
2156 }
2157 
2158 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_without_bias) {
2159   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2160   DeconvolutionOperatorTester()
2161     .has_bias(false)
2162     .input_size(kStridedInputHeight, kStridedInputWidth)
2163     .padding(1)
2164     .kernel_size(3, 3)
2165     .stride(2)
2166     .group_input_channels(23)
2167     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2168     .iterations(3)
2169     .TestQS8();
2170 }
2171 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_3x3s2)2172 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_3x3s2) {
2173   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2174   DeconvolutionOperatorTester()
2175     .input_size(kStridedInputHeight, kStridedInputWidth)
2176     .padding(1)
2177     .kernel_size(3, 3)
2178     .stride(2)
2179     .group_input_channels(15)
2180     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2181     .use_weights_cache(true)
2182     .iterations(3)
2183     .TestQS8();
2184 }
2185 
2186 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
2187 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2)2188 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2) {
2189   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2190   DeconvolutionOperatorTester()
2191     .input_size(kStridedInputHeight, kStridedInputWidth)
2192     .padding(1)
2193     .kernel_size(3, 3)
2194     .stride(2)
2195     .groups(2)
2196     .group_input_channels(17)
2197     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2198     .iterations(3)
2199     .TestQS8();
2200 }
2201 
TEST(DECONVOLUTION_NHWC_QS8,grouped_Kx3s2)2202 TEST(DECONVOLUTION_NHWC_QS8, grouped_Kx3s2) {
2203   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2204   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
2205     DeconvolutionOperatorTester()
2206       .input_size(kStridedInputHeight, kStridedInputWidth)
2207       .padding_width(1)
2208       .kernel_size(kernel_height, 3)
2209       .stride(2)
2210       .groups(2)
2211       .group_input_channels(17)
2212       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2213       .iterations(3)
2214       .TestQS8();
2215   }
2216 }
2217 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3xKs2)2218 TEST(DECONVOLUTION_NHWC_QS8, grouped_3xKs2) {
2219   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2220   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
2221     DeconvolutionOperatorTester()
2222       .input_size(kStridedInputHeight, kStridedInputWidth)
2223       .padding_height(1)
2224       .kernel_size(3, kernel_width)
2225       .stride(2)
2226       .groups(2)
2227       .group_input_channels(17)
2228       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2229       .iterations(3)
2230       .TestQS8();
2231   }
2232 }
2233 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3sSx1)2234 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3sSx1) {
2235   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2236   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
2237     DeconvolutionOperatorTester()
2238       .input_size(kStridedInputHeight, kStridedInputWidth)
2239       .padding(1)
2240       .padding_width(1)
2241       .kernel_size(3, 3)
2242       .stride_height(stride_height)
2243       .groups(2)
2244       .group_input_channels(17)
2245       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2246       .iterations(3)
2247       .TestQS8();
2248   }
2249 }
2250 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s1xS)2251 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s1xS) {
2252   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2253   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
2254     DeconvolutionOperatorTester()
2255       .input_size(kStridedInputHeight, kStridedInputWidth)
2256       .padding(1)
2257       .padding_width(1)
2258       .kernel_size(3, 3)
2259       .stride_width(stride_width)
2260       .groups(2)
2261       .group_input_channels(17)
2262       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2263       .iterations(3)
2264       .TestQS8();
2265   }
2266 }
2267 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_height_padding)2268 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_height_padding) {
2269   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2270   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
2271     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
2272       DeconvolutionOperatorTester()
2273         .input_size(kStridedInputHeight, kStridedInputWidth)
2274         .padding_width(1)
2275         .padding_top(padding_top)
2276         .padding_bottom(padding_bottom)
2277         .kernel_size(3, 3)
2278         .stride(2)
2279         .groups(2)
2280         .group_input_channels(17)
2281         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2282         .iterations(1)
2283         .TestQS8();
2284     }
2285   }
2286 }
2287 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_width_padding)2288 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_width_padding) {
2289   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2290   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
2291     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
2292       DeconvolutionOperatorTester()
2293         .input_size(kStridedInputHeight, kStridedInputWidth)
2294         .padding_height(1)
2295         .padding_left(padding_left)
2296         .padding_right(padding_right)
2297         .kernel_size(3, 3)
2298         .stride(2)
2299         .groups(2)
2300         .group_input_channels(17)
2301         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2302         .iterations(1)
2303         .TestQS8();
2304     }
2305   }
2306 }
2307 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_height_adjustment)2308 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_height_adjustment) {
2309   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2310   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2311     DeconvolutionOperatorTester()
2312       .input_size(kStridedInputHeight, kStridedInputWidth)
2313       .padding(1)
2314       .adjustment_height(adjustment_height)
2315       .kernel_size(3, 3)
2316       .stride(2)
2317       .groups(2)
2318       .group_input_channels(17)
2319       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2320       .iterations(1)
2321       .TestQS8();
2322   }
2323 }
2324 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_width_adjustment)2325 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_width_adjustment) {
2326   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2327   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2328     DeconvolutionOperatorTester()
2329       .input_size(kStridedInputHeight, kStridedInputWidth)
2330       .padding(1)
2331       .adjustment_width(adjustment_width)
2332       .kernel_size(3, 3)
2333       .stride(2)
2334       .groups(2)
2335       .group_input_channels(17)
2336       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2337       .iterations(1)
2338       .TestQS8();
2339   }
2340 }
2341 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_input_height)2342 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_input_height) {
2343   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2344   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2345     DeconvolutionOperatorTester()
2346       .input_size(input_height, kStridedInputWidth)
2347       .padding(1)
2348       .kernel_size(3, 3)
2349       .stride(2)
2350       .groups(2)
2351       .group_input_channels(17)
2352       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2353       .iterations(1)
2354       .TestQS8();
2355   }
2356 }
2357 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_input_width)2358 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_input_width) {
2359   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2360   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2361     DeconvolutionOperatorTester()
2362       .input_size(kStridedInputHeight, kStridedInputWidth)
2363       .padding(1)
2364       .kernel_size(3, 3)
2365       .stride(2)
2366       .groups(2)
2367       .group_input_channels(17)
2368       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2369       .iterations(1)
2370       .TestQS8();
2371   }
2372 }
2373 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_input_channels)2374 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_input_channels) {
2375   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2376   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
2377     DeconvolutionOperatorTester()
2378       .input_size(kStridedInputHeight, kStridedInputWidth)
2379       .padding(1)
2380       .kernel_size(3, 3)
2381       .stride(2)
2382       .groups(2)
2383       .group_input_channels(input_channels)
2384       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2385       .iterations(1)
2386       .TestQS8();
2387   }
2388 }
2389 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_varying_output_channels)2390 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_varying_output_channels) {
2391   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2392   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
2393     DeconvolutionOperatorTester()
2394       .input_size(kStridedInputHeight, kStridedInputWidth)
2395       .padding(1)
2396       .kernel_size(3, 3)
2397       .stride(2)
2398       .groups(2)
2399       .group_input_channels(17)
2400       .group_output_channels(output_channels)
2401       .iterations(1)
2402       .TestQS8();
2403   }
2404 }
2405 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_input_stride)2406 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_input_stride) {
2407   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2408   DeconvolutionOperatorTester()
2409     .input_size(kStridedInputHeight, kStridedInputWidth)
2410     .padding(1)
2411     .kernel_size(3, 3)
2412     .stride(2)
2413     .groups(2)
2414     .group_input_channels(17)
2415     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2416     .input_pixel_stride(37)
2417     .iterations(3)
2418     .TestQS8();
2419 }
2420 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_output_stride)2421 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_output_stride) {
2422   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2423   DeconvolutionOperatorTester()
2424     .input_size(kStridedInputHeight, kStridedInputWidth)
2425     .padding(1)
2426     .kernel_size(3, 3)
2427     .stride(2)
2428     .groups(2)
2429     .group_input_channels(17)
2430     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
2431     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
2432     .iterations(3)
2433     .TestQS8();
2434 }
2435 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_qmin)2436 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_qmin) {
2437   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2438   DeconvolutionOperatorTester()
2439     .input_size(kStridedInputHeight, kStridedInputWidth)
2440     .padding(1)
2441     .kernel_size(3, 3)
2442     .stride(2)
2443     .groups(2)
2444     .group_input_channels(17)
2445     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2446     .qmin(128)
2447     .iterations(3)
2448     .TestQS8();
2449 }
2450 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_with_qmax)2451 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_with_qmax) {
2452   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2453   DeconvolutionOperatorTester()
2454     .input_size(kStridedInputHeight, kStridedInputWidth)
2455     .padding(1)
2456     .kernel_size(3, 3)
2457     .stride(2)
2458     .groups(2)
2459     .group_input_channels(17)
2460     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2461     .qmax(128)
2462     .iterations(3)
2463     .TestQS8();
2464 }
2465 
TEST(DECONVOLUTION_NHWC_QS8,grouped_3x3s2_without_bias)2466 TEST(DECONVOLUTION_NHWC_QS8, grouped_3x3s2_without_bias) {
2467   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2468   DeconvolutionOperatorTester()
2469     .has_bias(false)
2470     .input_size(kStridedInputHeight, kStridedInputWidth)
2471     .padding(1)
2472     .kernel_size(3, 3)
2473     .stride(2)
2474     .groups(2)
2475     .group_input_channels(17)
2476     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2477     .iterations(3)
2478     .TestQS8();
2479 }
2480 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_grouped_3x3s2)2481 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_grouped_3x3s2) {
2482   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2483   DeconvolutionOperatorTester()
2484     .input_size(kStridedInputHeight, kStridedInputWidth)
2485     .padding(1)
2486     .kernel_size(3, 3)
2487     .stride(2)
2488     .groups(2)
2489     .group_input_channels(17)
2490     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2491     .use_weights_cache(true)
2492     .iterations(3)
2493     .TestQS8();
2494 }
2495 
2496 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
2497 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2)2498 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2) {
2499   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2500   DeconvolutionOperatorTester()
2501     .batch_size(2)
2502     .input_size(kStridedInputHeight, kStridedInputWidth)
2503     .padding(1)
2504     .kernel_size(3, 3)
2505     .stride(2)
2506     .group_input_channels(15)
2507     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2508     .iterations(3)
2509     .TestQS8();
2510 }
2511 
TEST(DECONVOLUTION_NHWC_QS8,batched_Kx3s2)2512 TEST(DECONVOLUTION_NHWC_QS8, batched_Kx3s2) {
2513   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2514   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
2515     DeconvolutionOperatorTester()
2516       .batch_size(2)
2517       .input_size(kStridedInputHeight, kStridedInputWidth)
2518       .padding_width(1)
2519       .kernel_size(kernel_height, 3)
2520       .stride(2)
2521       .group_input_channels(17)
2522       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2523       .iterations(3)
2524       .TestQS8();
2525   }
2526 }
2527 
TEST(DECONVOLUTION_NHWC_QS8,batched_3xKs2)2528 TEST(DECONVOLUTION_NHWC_QS8, batched_3xKs2) {
2529   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2530   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
2531     DeconvolutionOperatorTester()
2532       .batch_size(2)
2533       .input_size(kStridedInputHeight, kStridedInputWidth)
2534       .padding_height(1)
2535       .kernel_size(3, kernel_width)
2536       .stride(2)
2537       .group_input_channels(17)
2538       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2539       .iterations(3)
2540       .TestQS8();
2541   }
2542 }
2543 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3sSx1)2544 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3sSx1) {
2545   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2546   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
2547     DeconvolutionOperatorTester()
2548       .batch_size(2)
2549       .input_size(kStridedInputHeight, kStridedInputWidth)
2550       .padding(1)
2551       .padding_width(1)
2552       .kernel_size(3, 3)
2553       .stride_height(stride_height)
2554       .group_input_channels(17)
2555       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2556       .iterations(3)
2557       .TestQS8();
2558   }
2559 }
2560 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s1xS)2561 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s1xS) {
2562   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2563   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
2564     DeconvolutionOperatorTester()
2565       .batch_size(2)
2566       .input_size(kStridedInputHeight, kStridedInputWidth)
2567       .padding(1)
2568       .padding_width(1)
2569       .kernel_size(3, 3)
2570       .stride_width(stride_width)
2571       .group_input_channels(17)
2572       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2573       .iterations(3)
2574       .TestQS8();
2575   }
2576 }
2577 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_height_padding)2578 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_height_padding) {
2579   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2580   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
2581     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
2582       DeconvolutionOperatorTester()
2583         .batch_size(2)
2584         .input_size(kStridedInputHeight, kStridedInputWidth)
2585         .padding_width(1)
2586         .padding_top(padding_top)
2587         .padding_bottom(padding_bottom)
2588         .kernel_size(3, 3)
2589         .stride(2)
2590         .group_input_channels(15)
2591         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2592         .iterations(1)
2593         .TestQS8();
2594     }
2595   }
2596 }
2597 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_width_padding)2598 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_width_padding) {
2599   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2600   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
2601     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
2602       DeconvolutionOperatorTester()
2603         .batch_size(2)
2604         .input_size(kStridedInputHeight, kStridedInputWidth)
2605         .padding_height(1)
2606         .padding_left(padding_left)
2607         .padding_right(padding_right)
2608         .kernel_size(3, 3)
2609         .stride(2)
2610         .group_input_channels(15)
2611         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2612         .iterations(1)
2613         .TestQS8();
2614     }
2615   }
2616 }
2617 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_height_adjustment)2618 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_height_adjustment) {
2619   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2620   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2621     DeconvolutionOperatorTester()
2622       .batch_size(2)
2623       .input_size(kStridedInputHeight, kStridedInputWidth)
2624       .padding(1)
2625       .adjustment_height(adjustment_height)
2626       .kernel_size(3, 3)
2627       .stride(2)
2628       .group_input_channels(15)
2629       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2630       .iterations(1)
2631       .TestQS8();
2632   }
2633 }
2634 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_width_adjustment)2635 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_width_adjustment) {
2636   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2637   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2638     DeconvolutionOperatorTester()
2639       .batch_size(2)
2640       .input_size(kStridedInputHeight, kStridedInputWidth)
2641       .padding(1)
2642       .adjustment_width(adjustment_width)
2643       .kernel_size(3, 3)
2644       .stride(2)
2645       .group_input_channels(15)
2646       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2647       .iterations(1)
2648       .TestQS8();
2649   }
2650 }
2651 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_input_height)2652 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_input_height) {
2653   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2654   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2655     DeconvolutionOperatorTester()
2656       .batch_size(2)
2657       .input_size(input_height, kStridedInputWidth)
2658       .padding(1)
2659       .kernel_size(3, 3)
2660       .stride(2)
2661       .group_input_channels(15)
2662       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2663       .iterations(1)
2664       .TestQS8();
2665   }
2666 }
2667 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_input_width)2668 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_input_width) {
2669   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2670   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2671     DeconvolutionOperatorTester()
2672       .batch_size(2)
2673       .input_size(kStridedInputHeight, kStridedInputWidth)
2674       .padding(1)
2675       .kernel_size(3, 3)
2676       .stride(2)
2677       .group_input_channels(15)
2678       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2679       .iterations(1)
2680       .TestQS8();
2681   }
2682 }
2683 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_input_channels)2684 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_input_channels) {
2685   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2686   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
2687     DeconvolutionOperatorTester()
2688       .batch_size(2)
2689       .input_size(kStridedInputHeight, kStridedInputWidth)
2690       .padding(1)
2691       .kernel_size(3, 3)
2692       .stride(2)
2693       .group_input_channels(input_channels)
2694       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2695       .iterations(1)
2696       .TestQS8();
2697   }
2698 }
2699 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_varying_output_channels)2700 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_varying_output_channels) {
2701   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2702   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
2703     DeconvolutionOperatorTester()
2704       .batch_size(2)
2705       .input_size(kStridedInputHeight, kStridedInputWidth)
2706       .padding(1)
2707       .kernel_size(3, 3)
2708       .stride(2)
2709       .group_input_channels(23)
2710       .group_output_channels(output_channels)
2711       .iterations(1)
2712       .TestQS8();
2713   }
2714 }
2715 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_input_stride)2716 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_input_stride) {
2717   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2718   DeconvolutionOperatorTester()
2719     .batch_size(2)
2720     .input_size(kStridedInputHeight, kStridedInputWidth)
2721     .padding(1)
2722     .kernel_size(3, 3)
2723     .stride(2)
2724     .group_input_channels(23)
2725     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2726     .input_pixel_stride(28)
2727     .iterations(3)
2728     .TestQS8();
2729 }
2730 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_output_stride)2731 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_output_stride) {
2732   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2733   DeconvolutionOperatorTester()
2734     .batch_size(2)
2735     .input_size(kStridedInputHeight, kStridedInputWidth)
2736     .padding(1)
2737     .kernel_size(3, 3)
2738     .stride(2)
2739     .group_input_channels(23)
2740     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2741     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
2742     .iterations(3)
2743     .TestQS8();
2744 }
2745 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_qmin)2746 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_qmin) {
2747   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2748   DeconvolutionOperatorTester()
2749     .batch_size(2)
2750     .input_size(kStridedInputHeight, kStridedInputWidth)
2751     .padding(1)
2752     .kernel_size(3, 3)
2753     .stride(2)
2754     .group_input_channels(23)
2755     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2756     .qmin(128)
2757     .iterations(3)
2758     .TestQS8();
2759 }
2760 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_with_qmax)2761 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_with_qmax) {
2762   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2763   DeconvolutionOperatorTester()
2764     .batch_size(2)
2765     .input_size(kStridedInputHeight, kStridedInputWidth)
2766     .padding(1)
2767     .kernel_size(3, 3)
2768     .stride(2)
2769     .group_input_channels(23)
2770     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2771     .qmax(128)
2772     .iterations(3)
2773     .TestQS8();
2774 }
2775 
TEST(DECONVOLUTION_NHWC_QS8,batched_3x3s2_without_bias)2776 TEST(DECONVOLUTION_NHWC_QS8, batched_3x3s2_without_bias) {
2777   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2778   DeconvolutionOperatorTester()
2779     .has_bias(false)
2780     .batch_size(2)
2781     .input_size(kStridedInputHeight, kStridedInputWidth)
2782     .padding(1)
2783     .kernel_size(3, 3)
2784     .stride(2)
2785     .group_input_channels(23)
2786     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2787     .iterations(3)
2788     .TestQS8();
2789 }
2790 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_3x3s2)2791 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_3x3s2) {
2792   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2793   DeconvolutionOperatorTester()
2794     .batch_size(2)
2795     .input_size(kStridedInputHeight, kStridedInputWidth)
2796     .padding(1)
2797     .kernel_size(3, 3)
2798     .stride(2)
2799     .group_input_channels(15)
2800     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2801     .use_weights_cache(true)
2802     .iterations(3)
2803     .TestQS8();
2804 }
2805 
2806 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
2807 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2)2808 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2) {
2809   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2810   DeconvolutionOperatorTester()
2811     .batch_size(2)
2812     .input_size(kStridedInputHeight, kStridedInputWidth)
2813     .padding(1)
2814     .kernel_size(3, 3)
2815     .stride(2)
2816     .groups(2)
2817     .group_input_channels(17)
2818     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2819     .iterations(3)
2820     .TestQS8();
2821 }
2822 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_Kx3s2)2823 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_Kx3s2) {
2824   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2825   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
2826     DeconvolutionOperatorTester()
2827       .batch_size(2)
2828       .input_size(kStridedInputHeight, kStridedInputWidth)
2829       .padding_width(1)
2830       .kernel_size(kernel_height, 3)
2831       .stride(2)
2832       .groups(2)
2833       .group_input_channels(17)
2834       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2835       .iterations(3)
2836       .TestQS8();
2837   }
2838 }
2839 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3xKs2)2840 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3xKs2) {
2841   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2842   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
2843     DeconvolutionOperatorTester()
2844       .batch_size(2)
2845       .input_size(kStridedInputHeight, kStridedInputWidth)
2846       .padding_height(1)
2847       .kernel_size(3, kernel_width)
2848       .stride(2)
2849       .groups(2)
2850       .group_input_channels(17)
2851       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2852       .iterations(3)
2853       .TestQS8();
2854   }
2855 }
2856 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3sSx1)2857 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3sSx1) {
2858   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2859   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
2860     DeconvolutionOperatorTester()
2861       .batch_size(2)
2862       .input_size(kStridedInputHeight, kStridedInputWidth)
2863       .padding(1)
2864       .padding_width(1)
2865       .kernel_size(3, 3)
2866       .stride_height(stride_height)
2867       .groups(2)
2868       .group_input_channels(17)
2869       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2870       .iterations(3)
2871       .TestQS8();
2872   }
2873 }
2874 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s1xS)2875 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s1xS) {
2876   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2877   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
2878     DeconvolutionOperatorTester()
2879       .batch_size(2)
2880       .input_size(kStridedInputHeight, kStridedInputWidth)
2881       .padding(1)
2882       .padding_width(1)
2883       .kernel_size(3, 3)
2884       .stride_width(stride_width)
2885       .groups(2)
2886       .group_input_channels(17)
2887       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2888       .iterations(3)
2889       .TestQS8();
2890   }
2891 }
2892 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_height_padding)2893 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_height_padding) {
2894   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2895   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
2896     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
2897       DeconvolutionOperatorTester()
2898         .batch_size(2)
2899         .input_size(kStridedInputHeight, kStridedInputWidth)
2900         .padding_width(1)
2901         .padding_top(padding_top)
2902         .padding_bottom(padding_bottom)
2903         .kernel_size(3, 3)
2904         .stride(2)
2905         .groups(2)
2906         .group_input_channels(17)
2907         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2908         .iterations(1)
2909         .TestQS8();
2910     }
2911   }
2912 }
2913 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_width_padding)2914 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_width_padding) {
2915   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2916   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
2917     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
2918       DeconvolutionOperatorTester()
2919         .batch_size(2)
2920         .input_size(kStridedInputHeight, kStridedInputWidth)
2921         .padding_height(1)
2922         .padding_left(padding_left)
2923         .padding_right(padding_right)
2924         .kernel_size(3, 3)
2925         .stride(2)
2926         .groups(2)
2927         .group_input_channels(17)
2928         .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2929         .iterations(1)
2930         .TestQS8();
2931     }
2932   }
2933 }
2934 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_height_adjustment)2935 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_height_adjustment) {
2936   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2937   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
2938     DeconvolutionOperatorTester()
2939       .batch_size(2)
2940       .input_size(kStridedInputHeight, kStridedInputWidth)
2941       .padding(1)
2942       .adjustment_height(adjustment_height)
2943       .kernel_size(3, 3)
2944       .stride(2)
2945       .groups(2)
2946       .group_input_channels(17)
2947       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2948       .iterations(1)
2949       .TestQS8();
2950   }
2951 }
2952 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_width_adjustment)2953 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_width_adjustment) {
2954   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2955   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
2956     DeconvolutionOperatorTester()
2957       .batch_size(2)
2958       .input_size(kStridedInputHeight, kStridedInputWidth)
2959       .padding(1)
2960       .adjustment_width(adjustment_width)
2961       .kernel_size(3, 3)
2962       .stride(2)
2963       .groups(2)
2964       .group_input_channels(17)
2965       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2966       .iterations(1)
2967       .TestQS8();
2968   }
2969 }
2970 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_input_height)2971 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_input_height) {
2972   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2973   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
2974     DeconvolutionOperatorTester()
2975       .batch_size(2)
2976       .input_size(input_height, kStridedInputWidth)
2977       .padding(1)
2978       .kernel_size(3, 3)
2979       .stride(2)
2980       .groups(2)
2981       .group_input_channels(17)
2982       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
2983       .iterations(1)
2984       .TestQS8();
2985   }
2986 }
2987 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_input_width)2988 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_input_width) {
2989   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
2990   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
2991     DeconvolutionOperatorTester()
2992       .batch_size(2)
2993       .input_size(kStridedInputHeight, kStridedInputWidth)
2994       .padding(1)
2995       .kernel_size(3, 3)
2996       .stride(2)
2997       .groups(2)
2998       .group_input_channels(17)
2999       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3000       .iterations(1)
3001       .TestQS8();
3002   }
3003 }
3004 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_input_channels)3005 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_input_channels) {
3006   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3007   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
3008     DeconvolutionOperatorTester()
3009       .batch_size(2)
3010       .input_size(kStridedInputHeight, kStridedInputWidth)
3011       .padding(1)
3012       .kernel_size(3, 3)
3013       .stride(2)
3014       .groups(2)
3015       .group_input_channels(input_channels)
3016       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3017       .iterations(1)
3018       .TestQS8();
3019   }
3020 }
3021 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_varying_output_channels)3022 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_varying_output_channels) {
3023   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3024   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3025     DeconvolutionOperatorTester()
3026       .batch_size(2)
3027       .input_size(kStridedInputHeight, kStridedInputWidth)
3028       .padding(1)
3029       .kernel_size(3, 3)
3030       .stride(2)
3031       .groups(2)
3032       .group_input_channels(17)
3033       .group_output_channels(output_channels)
3034       .iterations(1)
3035       .TestQS8();
3036   }
3037 }
3038 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_input_stride)3039 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_input_stride) {
3040   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3041   DeconvolutionOperatorTester()
3042     .batch_size(2)
3043     .input_size(kStridedInputHeight, kStridedInputWidth)
3044     .padding(1)
3045     .kernel_size(3, 3)
3046     .stride(2)
3047     .groups(2)
3048     .group_input_channels(17)
3049     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3050     .input_pixel_stride(37)
3051     .iterations(3)
3052     .TestQS8();
3053 }
3054 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_output_stride)3055 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_output_stride) {
3056   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3057   DeconvolutionOperatorTester()
3058     .batch_size(2)
3059     .input_size(kStridedInputHeight, kStridedInputWidth)
3060     .padding(1)
3061     .kernel_size(3, 3)
3062     .stride(2)
3063     .groups(2)
3064     .group_input_channels(17)
3065     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
3066     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3067     .iterations(3)
3068     .TestQS8();
3069 }
3070 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_qmin)3071 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_qmin) {
3072   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3073   DeconvolutionOperatorTester()
3074     .batch_size(2)
3075     .input_size(kStridedInputHeight, kStridedInputWidth)
3076     .padding(1)
3077     .kernel_size(3, 3)
3078     .stride(2)
3079     .groups(2)
3080     .group_input_channels(17)
3081     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3082     .qmin(128)
3083     .iterations(3)
3084     .TestQS8();
3085 }
3086 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_with_qmax)3087 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_with_qmax) {
3088   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3089   DeconvolutionOperatorTester()
3090     .batch_size(2)
3091     .input_size(kStridedInputHeight, kStridedInputWidth)
3092     .padding(1)
3093     .kernel_size(3, 3)
3094     .stride(2)
3095     .groups(2)
3096     .group_input_channels(17)
3097     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3098     .qmax(128)
3099     .iterations(3)
3100     .TestQS8();
3101 }
3102 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_3x3s2_without_bias)3103 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_3x3s2_without_bias) {
3104   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3105   DeconvolutionOperatorTester()
3106     .has_bias(false)
3107     .batch_size(2)
3108     .input_size(kStridedInputHeight, kStridedInputWidth)
3109     .padding(1)
3110     .kernel_size(3, 3)
3111     .stride(2)
3112     .groups(2)
3113     .group_input_channels(17)
3114     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3115     .iterations(3)
3116     .TestQS8();
3117 }
3118 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_grouped_3x3s2)3119 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_grouped_3x3s2) {
3120   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3121   DeconvolutionOperatorTester()
3122     .batch_size(2)
3123     .input_size(kStridedInputHeight, kStridedInputWidth)
3124     .padding(1)
3125     .kernel_size(3, 3)
3126     .stride(2)
3127     .groups(2)
3128     .group_input_channels(17)
3129     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3130     .use_weights_cache(true)
3131     .iterations(3)
3132     .TestQS8();
3133 }
3134 
3135 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
3136 
3137 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_setup_changing_batch) {
3138   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3139   DeconvolutionOperatorTester()
3140     .batch_size(2)
3141     .next_batch_size(5)
3142     .input_size(kStridedInputHeight, kStridedInputWidth)
3143     .kernel_height(3)
3144     .kernel_width(5)
3145     .stride(2)
3146     .groups(2)
3147     .group_input_channels(15)
3148     .group_output_channels(17)
3149     .TestSetupQS8();
3150 }
3151 
3152 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_setup_changing_height) {
3153   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3154   DeconvolutionOperatorTester()
3155     .batch_size(2)
3156     .input_size(kStridedInputHeight, kStridedInputWidth)
3157     .next_input_height(kStridedInputHeight + 3)
3158     .kernel_height(3)
3159     .kernel_width(5)
3160     .stride(2)
3161     .groups(2)
3162     .group_input_channels(15)
3163     .group_output_channels(17)
3164     .TestSetupQS8();
3165 }
3166 
3167 TEST(DECONVOLUTION_NHWC_QS8, 3x3s2_setup_changing_width) {
3168   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3169   DeconvolutionOperatorTester()
3170     .batch_size(2)
3171     .input_size(kStridedInputHeight, kStridedInputWidth)
3172     .next_input_width(kStridedInputWidth + 3)
3173     .kernel_height(3)
3174     .kernel_width(5)
3175     .stride(2)
3176     .groups(2)
3177     .group_input_channels(15)
3178     .group_output_channels(17)
3179     .TestSetupQS8();
3180 }
3181 
3182 /**************************** SUBCONV2D/GEMM path ****************************/
3183 
3184 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2) {
3185   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3186   DeconvolutionOperatorTester()
3187     .input_size(kStridedInputHeight, kStridedInputWidth)
3188     .kernel_size(2, 2)
3189     .stride(2)
3190     .group_input_channels(15)
3191     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3192     .iterations(3)
3193     .TestQS8();
3194 }
3195 
TEST(DECONVOLUTION_NHWC_QS8,Kx2sKx2)3196 TEST(DECONVOLUTION_NHWC_QS8, Kx2sKx2) {
3197   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3198   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3199     DeconvolutionOperatorTester()
3200       .input_size(kStridedInputHeight, kStridedInputWidth)
3201       .kernel_size(kernel_height, 2)
3202       .stride(kernel_height, 2)
3203       .group_input_channels(17)
3204       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3205       .iterations(3)
3206       .TestQS8();
3207   }
3208 }
3209 
3210 TEST(DECONVOLUTION_NHWC_QS8, 2xKs2xK) {
3211   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3212   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3213     DeconvolutionOperatorTester()
3214       .input_size(kStridedInputHeight, kStridedInputWidth)
3215       .kernel_size(2, kernel_width)
3216       .stride(2, kernel_width)
3217       .group_input_channels(17)
3218       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3219       .iterations(3)
3220       .TestQS8();
3221   }
3222 }
3223 
3224 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_height_adjustment) {
3225   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3226   DeconvolutionOperatorTester()
3227     .input_size(kStridedInputHeight, kStridedInputWidth)
3228     .adjustment_height(1)
3229     .kernel_size(2, 2)
3230     .stride(2)
3231     .group_input_channels(15)
3232     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3233     .iterations(1)
3234     .TestQS8();
3235 }
3236 
3237 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_width_adjustment) {
3238   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3239   DeconvolutionOperatorTester()
3240     .input_size(kStridedInputHeight, kStridedInputWidth)
3241     .adjustment_width(1)
3242     .kernel_size(2, 2)
3243     .stride(2)
3244     .group_input_channels(15)
3245     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3246     .iterations(1)
3247     .TestQS8();
3248 }
3249 
3250 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_input_height) {
3251   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3252   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3253     DeconvolutionOperatorTester()
3254       .input_size(input_height, kStridedInputWidth)
3255       .kernel_size(2, 2)
3256       .stride(2)
3257       .group_input_channels(15)
3258       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3259       .iterations(1)
3260       .TestQS8();
3261   }
3262 }
3263 
3264 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_input_width) {
3265   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3266   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3267     DeconvolutionOperatorTester()
3268       .input_size(kStridedInputHeight, kStridedInputWidth)
3269       .kernel_size(2, 2)
3270       .stride(2)
3271       .group_input_channels(15)
3272       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3273       .iterations(1)
3274       .TestQS8();
3275   }
3276 }
3277 
3278 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_input_channels) {
3279   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3280   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
3281     DeconvolutionOperatorTester()
3282       .input_size(kStridedInputHeight, kStridedInputWidth)
3283       .kernel_size(2, 2)
3284       .stride(2)
3285       .group_input_channels(input_channels)
3286       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3287       .iterations(1)
3288       .TestQS8();
3289   }
3290 }
3291 
3292 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_varying_output_channels) {
3293   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3294   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3295     DeconvolutionOperatorTester()
3296       .input_size(kStridedInputHeight, kStridedInputWidth)
3297       .kernel_size(2, 2)
3298       .stride(2)
3299       .group_input_channels(23)
3300       .group_output_channels(output_channels)
3301       .iterations(1)
3302       .TestQS8();
3303   }
3304 }
3305 
3306 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_input_stride) {
3307   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3308   DeconvolutionOperatorTester()
3309     .input_size(kStridedInputHeight, kStridedInputWidth)
3310     .kernel_size(2, 2)
3311     .stride(2)
3312     .group_input_channels(23)
3313     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3314     .input_pixel_stride(28)
3315     .iterations(3)
3316     .TestQS8();
3317 }
3318 
3319 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_output_stride) {
3320   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3321   DeconvolutionOperatorTester()
3322     .input_size(kStridedInputHeight, kStridedInputWidth)
3323     .kernel_size(2, 2)
3324     .stride(2)
3325     .group_input_channels(23)
3326     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3327     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3328     .iterations(3)
3329     .TestQS8();
3330 }
3331 
3332 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_qmin) {
3333   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3334   DeconvolutionOperatorTester()
3335     .input_size(kStridedInputHeight, kStridedInputWidth)
3336     .kernel_size(2, 2)
3337     .stride(2)
3338     .group_input_channels(23)
3339     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3340     .qmin(128)
3341     .iterations(3)
3342     .TestQS8();
3343 }
3344 
3345 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_with_qmax) {
3346   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3347   DeconvolutionOperatorTester()
3348     .input_size(kStridedInputHeight, kStridedInputWidth)
3349     .kernel_size(2, 2)
3350     .stride(2)
3351     .group_input_channels(23)
3352     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3353     .qmax(128)
3354     .iterations(3)
3355     .TestQS8();
3356 }
3357 
3358 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_without_bias) {
3359   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3360   DeconvolutionOperatorTester()
3361     .has_bias(false)
3362     .input_size(kStridedInputHeight, kStridedInputWidth)
3363     .kernel_size(2, 2)
3364     .stride(2)
3365     .group_input_channels(23)
3366     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3367     .iterations(3)
3368     .TestQS8();
3369 }
3370 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_2x2s2)3371 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_2x2s2) {
3372   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3373   DeconvolutionOperatorTester()
3374     .input_size(kStridedInputHeight, kStridedInputWidth)
3375     .kernel_size(2, 2)
3376     .stride(2)
3377     .group_input_channels(15)
3378     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3379     .use_weights_cache(true)
3380     .iterations(3)
3381     .TestQS8();
3382 }
3383 
3384 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
3385 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2)3386 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2) {
3387   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3388   DeconvolutionOperatorTester()
3389     .input_size(kStridedInputHeight, kStridedInputWidth)
3390     .kernel_size(2, 2)
3391     .stride(2)
3392     .groups(2)
3393     .group_input_channels(17)
3394     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3395     .iterations(3)
3396     .TestQS8();
3397 }
3398 
TEST(DECONVOLUTION_NHWC_QS8,grouped_Kx2sKx2)3399 TEST(DECONVOLUTION_NHWC_QS8, grouped_Kx2sKx2) {
3400   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3401   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3402     DeconvolutionOperatorTester()
3403       .input_size(kStridedInputHeight, kStridedInputWidth)
3404       .kernel_size(kernel_height, 2)
3405       .stride(kernel_height, 2)
3406       .groups(2)
3407       .group_input_channels(17)
3408       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3409       .iterations(3)
3410       .TestQS8();
3411   }
3412 }
3413 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2xKs2xK)3414 TEST(DECONVOLUTION_NHWC_QS8, grouped_2xKs2xK) {
3415   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3416   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3417     DeconvolutionOperatorTester()
3418       .input_size(kStridedInputHeight, kStridedInputWidth)
3419       .kernel_size(2, kernel_width)
3420       .stride(2, kernel_width)
3421       .groups(2)
3422       .group_input_channels(17)
3423       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3424       .iterations(3)
3425       .TestQS8();
3426   }
3427 }
3428 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_height_adjustment)3429 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_height_adjustment) {
3430   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3431   DeconvolutionOperatorTester()
3432     .input_size(kStridedInputHeight, kStridedInputWidth)
3433     .adjustment_height(1)
3434     .kernel_size(2, 2)
3435     .stride(2)
3436     .groups(2)
3437     .group_input_channels(17)
3438     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3439     .iterations(1)
3440     .TestQS8();
3441 }
3442 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_width_adjustment)3443 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_width_adjustment) {
3444   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3445   DeconvolutionOperatorTester()
3446     .input_size(kStridedInputHeight, kStridedInputWidth)
3447     .adjustment_width(1)
3448     .kernel_size(2, 2)
3449     .stride(2)
3450     .groups(2)
3451     .group_input_channels(17)
3452     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3453     .iterations(1)
3454     .TestQS8();
3455 }
3456 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_input_height)3457 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_input_height) {
3458   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3459   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3460     DeconvolutionOperatorTester()
3461       .input_size(input_height, kStridedInputWidth)
3462       .kernel_size(2, 2)
3463       .stride(2)
3464       .groups(2)
3465       .group_input_channels(17)
3466       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3467       .iterations(1)
3468       .TestQS8();
3469   }
3470 }
3471 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_input_width)3472 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_input_width) {
3473   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3474   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3475     DeconvolutionOperatorTester()
3476       .input_size(kStridedInputHeight, kStridedInputWidth)
3477       .kernel_size(2, 2)
3478       .stride(2)
3479       .groups(2)
3480       .group_input_channels(17)
3481       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3482       .iterations(1)
3483       .TestQS8();
3484   }
3485 }
3486 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_input_channels)3487 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_input_channels) {
3488   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3489   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
3490     DeconvolutionOperatorTester()
3491       .input_size(kStridedInputHeight, kStridedInputWidth)
3492       .kernel_size(2, 2)
3493       .stride(2)
3494       .groups(2)
3495       .group_input_channels(input_channels)
3496       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3497       .iterations(1)
3498       .TestQS8();
3499   }
3500 }
3501 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_varying_output_channels)3502 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_varying_output_channels) {
3503   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3504   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3505     DeconvolutionOperatorTester()
3506       .input_size(kStridedInputHeight, kStridedInputWidth)
3507       .kernel_size(2, 2)
3508       .stride(2)
3509       .groups(2)
3510       .group_input_channels(17)
3511       .group_output_channels(output_channels)
3512       .iterations(1)
3513       .TestQS8();
3514   }
3515 }
3516 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_input_stride)3517 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_input_stride) {
3518   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3519   DeconvolutionOperatorTester()
3520     .input_size(kStridedInputHeight, kStridedInputWidth)
3521     .kernel_size(2, 2)
3522     .stride(2)
3523     .groups(2)
3524     .group_input_channels(17)
3525     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3526     .input_pixel_stride(37)
3527     .iterations(3)
3528     .TestQS8();
3529 }
3530 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_output_stride)3531 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_output_stride) {
3532   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3533   DeconvolutionOperatorTester()
3534     .input_size(kStridedInputHeight, kStridedInputWidth)
3535     .kernel_size(2, 2)
3536     .stride(2)
3537     .groups(2)
3538     .group_input_channels(17)
3539     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
3540     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3541     .iterations(3)
3542     .TestQS8();
3543 }
3544 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_qmin)3545 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_qmin) {
3546   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3547   DeconvolutionOperatorTester()
3548     .input_size(kStridedInputHeight, kStridedInputWidth)
3549     .kernel_size(2, 2)
3550     .stride(2)
3551     .groups(2)
3552     .group_input_channels(17)
3553     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3554     .qmin(128)
3555     .iterations(3)
3556     .TestQS8();
3557 }
3558 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_with_qmax)3559 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_with_qmax) {
3560   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3561   DeconvolutionOperatorTester()
3562     .input_size(kStridedInputHeight, kStridedInputWidth)
3563     .kernel_size(2, 2)
3564     .stride(2)
3565     .groups(2)
3566     .group_input_channels(17)
3567     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3568     .qmax(128)
3569     .iterations(3)
3570     .TestQS8();
3571 }
3572 
TEST(DECONVOLUTION_NHWC_QS8,grouped_2x2s2_without_bias)3573 TEST(DECONVOLUTION_NHWC_QS8, grouped_2x2s2_without_bias) {
3574   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3575   DeconvolutionOperatorTester()
3576     .has_bias(false)
3577     .input_size(kStridedInputHeight, kStridedInputWidth)
3578     .kernel_size(2, 2)
3579     .stride(2)
3580     .groups(2)
3581     .group_input_channels(17)
3582     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3583     .iterations(3)
3584     .TestQS8();
3585 }
3586 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_grouped_2x2s2)3587 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_grouped_2x2s2) {
3588   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3589   DeconvolutionOperatorTester()
3590     .input_size(kStridedInputHeight, kStridedInputWidth)
3591     .kernel_size(2, 2)
3592     .stride(2)
3593     .groups(2)
3594     .group_input_channels(17)
3595     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3596     .use_weights_cache(true)
3597     .iterations(3)
3598     .TestQS8();
3599 }
3600 
3601 /**************************** SUBCONV2D/GEMM path, batched ****************************/
3602 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2)3603 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2) {
3604   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3605   DeconvolutionOperatorTester()
3606     .batch_size(2)
3607     .input_size(kStridedInputHeight, kStridedInputWidth)
3608     .kernel_size(2, 2)
3609     .stride(2)
3610     .group_input_channels(15)
3611     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3612     .iterations(3)
3613     .TestQS8();
3614 }
3615 
TEST(DECONVOLUTION_NHWC_QS8,batched_Kx2sKx2)3616 TEST(DECONVOLUTION_NHWC_QS8, batched_Kx2sKx2) {
3617   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3618   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3619     DeconvolutionOperatorTester()
3620       .batch_size(2)
3621       .input_size(kStridedInputHeight, kStridedInputWidth)
3622       .kernel_size(kernel_height, 2)
3623       .stride(kernel_height, 2)
3624       .group_input_channels(17)
3625       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3626       .iterations(3)
3627       .TestQS8();
3628   }
3629 }
3630 
TEST(DECONVOLUTION_NHWC_QS8,batched_2xKs2xK)3631 TEST(DECONVOLUTION_NHWC_QS8, batched_2xKs2xK) {
3632   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3633   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3634     DeconvolutionOperatorTester()
3635       .batch_size(2)
3636       .input_size(kStridedInputHeight, kStridedInputWidth)
3637       .kernel_size(2, kernel_width)
3638       .stride(2, kernel_width)
3639       .group_input_channels(17)
3640       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3641       .iterations(3)
3642       .TestQS8();
3643   }
3644 }
3645 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_height_adjustment)3646 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_height_adjustment) {
3647   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3648   DeconvolutionOperatorTester()
3649     .batch_size(2)
3650     .input_size(kStridedInputHeight, kStridedInputWidth)
3651     .adjustment_height(1)
3652     .kernel_size(2, 2)
3653     .stride(2)
3654     .group_input_channels(15)
3655     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3656     .iterations(1)
3657     .TestQS8();
3658 }
3659 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_width_adjustment)3660 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_width_adjustment) {
3661   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3662   DeconvolutionOperatorTester()
3663     .batch_size(2)
3664     .input_size(kStridedInputHeight, kStridedInputWidth)
3665     .adjustment_width(1)
3666     .kernel_size(2, 2)
3667     .stride(2)
3668     .group_input_channels(15)
3669     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3670     .iterations(1)
3671     .TestQS8();
3672 }
3673 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_input_height)3674 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_input_height) {
3675   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3676   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3677     DeconvolutionOperatorTester()
3678       .batch_size(2)
3679       .input_size(input_height, kStridedInputWidth)
3680       .kernel_size(2, 2)
3681       .stride(2)
3682       .group_input_channels(15)
3683       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3684       .iterations(1)
3685       .TestQS8();
3686   }
3687 }
3688 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_input_width)3689 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_input_width) {
3690   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3691   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3692     DeconvolutionOperatorTester()
3693       .batch_size(2)
3694       .input_size(kStridedInputHeight, kStridedInputWidth)
3695       .kernel_size(2, 2)
3696       .stride(2)
3697       .group_input_channels(15)
3698       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3699       .iterations(1)
3700       .TestQS8();
3701   }
3702 }
3703 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_input_channels)3704 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_input_channels) {
3705   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3706   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
3707     DeconvolutionOperatorTester()
3708       .batch_size(2)
3709       .input_size(kStridedInputHeight, kStridedInputWidth)
3710       .kernel_size(2, 2)
3711       .stride(2)
3712       .group_input_channels(input_channels)
3713       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3714       .iterations(1)
3715       .TestQS8();
3716   }
3717 }
3718 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_varying_output_channels)3719 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_varying_output_channels) {
3720   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3721   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3722     DeconvolutionOperatorTester()
3723       .batch_size(2)
3724       .input_size(kStridedInputHeight, kStridedInputWidth)
3725       .kernel_size(2, 2)
3726       .stride(2)
3727       .group_input_channels(23)
3728       .group_output_channels(output_channels)
3729       .iterations(1)
3730       .TestQS8();
3731   }
3732 }
3733 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_input_stride)3734 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_input_stride) {
3735   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3736   DeconvolutionOperatorTester()
3737     .batch_size(2)
3738     .input_size(kStridedInputHeight, kStridedInputWidth)
3739     .kernel_size(2, 2)
3740     .stride(2)
3741     .group_input_channels(23)
3742     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3743     .input_pixel_stride(28)
3744     .iterations(3)
3745     .TestQS8();
3746 }
3747 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_output_stride)3748 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_output_stride) {
3749   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3750   DeconvolutionOperatorTester()
3751     .batch_size(2)
3752     .input_size(kStridedInputHeight, kStridedInputWidth)
3753     .kernel_size(2, 2)
3754     .stride(2)
3755     .group_input_channels(23)
3756     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3757     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3758     .iterations(3)
3759     .TestQS8();
3760 }
3761 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_qmin)3762 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_qmin) {
3763   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3764   DeconvolutionOperatorTester()
3765     .batch_size(2)
3766     .input_size(kStridedInputHeight, kStridedInputWidth)
3767     .kernel_size(2, 2)
3768     .stride(2)
3769     .group_input_channels(23)
3770     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3771     .qmin(128)
3772     .iterations(3)
3773     .TestQS8();
3774 }
3775 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_with_qmax)3776 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_with_qmax) {
3777   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3778   DeconvolutionOperatorTester()
3779     .batch_size(2)
3780     .input_size(kStridedInputHeight, kStridedInputWidth)
3781     .kernel_size(2, 2)
3782     .stride(2)
3783     .group_input_channels(23)
3784     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3785     .qmax(128)
3786     .iterations(3)
3787     .TestQS8();
3788 }
3789 
TEST(DECONVOLUTION_NHWC_QS8,batched_2x2s2_without_bias)3790 TEST(DECONVOLUTION_NHWC_QS8, batched_2x2s2_without_bias) {
3791   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3792   DeconvolutionOperatorTester()
3793     .has_bias(false)
3794     .batch_size(2)
3795     .input_size(kStridedInputHeight, kStridedInputWidth)
3796     .kernel_size(2, 2)
3797     .stride(2)
3798     .group_input_channels(23)
3799     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3800     .iterations(3)
3801     .TestQS8();
3802 }
3803 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_2x2s2)3804 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_2x2s2) {
3805   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3806   DeconvolutionOperatorTester()
3807     .batch_size(2)
3808     .input_size(kStridedInputHeight, kStridedInputWidth)
3809     .kernel_size(2, 2)
3810     .stride(2)
3811     .group_input_channels(15)
3812     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3813     .use_weights_cache(true)
3814     .iterations(3)
3815     .TestQS8();
3816 }
3817 
3818 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
3819 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2)3820 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2) {
3821   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3822   DeconvolutionOperatorTester()
3823     .batch_size(2)
3824     .input_size(kStridedInputHeight, kStridedInputWidth)
3825     .kernel_size(2, 2)
3826     .stride(2)
3827     .groups(2)
3828     .group_input_channels(17)
3829     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3830     .iterations(3)
3831     .TestQS8();
3832 }
3833 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_Kx2sKx2)3834 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_Kx2sKx2) {
3835   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3836   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
3837     DeconvolutionOperatorTester()
3838       .batch_size(2)
3839       .input_size(kStridedInputHeight, kStridedInputWidth)
3840       .kernel_size(kernel_height, 2)
3841       .stride(kernel_height, 2)
3842       .groups(2)
3843       .group_input_channels(17)
3844       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3845       .iterations(3)
3846       .TestQS8();
3847   }
3848 }
3849 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2xKs2xK)3850 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2xKs2xK) {
3851   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3852   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
3853     DeconvolutionOperatorTester()
3854       .batch_size(2)
3855       .input_size(kStridedInputHeight, kStridedInputWidth)
3856       .kernel_size(2, kernel_width)
3857       .stride(2, kernel_width)
3858       .groups(2)
3859       .group_input_channels(17)
3860       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3861       .iterations(3)
3862       .TestQS8();
3863   }
3864 }
3865 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_height_adjustment)3866 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_height_adjustment) {
3867   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3868   DeconvolutionOperatorTester()
3869     .batch_size(2)
3870     .input_size(kStridedInputHeight, kStridedInputWidth)
3871     .adjustment_height(1)
3872     .kernel_size(2, 2)
3873     .stride(2)
3874     .groups(2)
3875     .group_input_channels(17)
3876     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3877     .iterations(1)
3878     .TestQS8();
3879 }
3880 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_width_adjustment)3881 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_width_adjustment) {
3882   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3883   DeconvolutionOperatorTester()
3884     .batch_size(2)
3885     .input_size(kStridedInputHeight, kStridedInputWidth)
3886     .adjustment_width(1)
3887     .kernel_size(2, 2)
3888     .stride(2)
3889     .groups(2)
3890     .group_input_channels(17)
3891     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3892     .iterations(1)
3893     .TestQS8();
3894 }
3895 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_input_height)3896 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_input_height) {
3897   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3898   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
3899     DeconvolutionOperatorTester()
3900       .batch_size(2)
3901       .input_size(input_height, kStridedInputWidth)
3902       .kernel_size(2, 2)
3903       .stride(2)
3904       .groups(2)
3905       .group_input_channels(17)
3906       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3907       .iterations(1)
3908       .TestQS8();
3909   }
3910 }
3911 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_input_width)3912 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_input_width) {
3913   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3914   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
3915     DeconvolutionOperatorTester()
3916       .batch_size(2)
3917       .input_size(kStridedInputHeight, kStridedInputWidth)
3918       .kernel_size(2, 2)
3919       .stride(2)
3920       .groups(2)
3921       .group_input_channels(17)
3922       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3923       .iterations(1)
3924       .TestQS8();
3925   }
3926 }
3927 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_input_channels)3928 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_input_channels) {
3929   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3930   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
3931     DeconvolutionOperatorTester()
3932       .batch_size(2)
3933       .input_size(kStridedInputHeight, kStridedInputWidth)
3934       .kernel_size(2, 2)
3935       .stride(2)
3936       .groups(2)
3937       .group_input_channels(input_channels)
3938       .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3939       .iterations(1)
3940       .TestQS8();
3941   }
3942 }
3943 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_varying_output_channels)3944 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_varying_output_channels) {
3945   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3946   for (size_t output_channels = 1; output_channels <= xnn_params.qs8.gemm.nr * 2; output_channels *= 2) {
3947     DeconvolutionOperatorTester()
3948       .batch_size(2)
3949       .input_size(kStridedInputHeight, kStridedInputWidth)
3950       .kernel_size(2, 2)
3951       .stride(2)
3952       .groups(2)
3953       .group_input_channels(17)
3954       .group_output_channels(output_channels)
3955       .iterations(1)
3956       .TestQS8();
3957   }
3958 }
3959 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_input_stride)3960 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_input_stride) {
3961   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3962   DeconvolutionOperatorTester()
3963     .batch_size(2)
3964     .input_size(kStridedInputHeight, kStridedInputWidth)
3965     .kernel_size(2, 2)
3966     .stride(2)
3967     .groups(2)
3968     .group_input_channels(17)
3969     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
3970     .input_pixel_stride(37)
3971     .iterations(3)
3972     .TestQS8();
3973 }
3974 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_output_stride)3975 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_output_stride) {
3976   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3977   DeconvolutionOperatorTester()
3978     .batch_size(2)
3979     .input_size(kStridedInputHeight, kStridedInputWidth)
3980     .kernel_size(2, 2)
3981     .stride(2)
3982     .groups(2)
3983     .group_input_channels(17)
3984     .group_output_channels(xnn_params.qs8.gemm.nr + 3)
3985     .output_pixel_stride(xnn_params.qs8.gemm.nr * 2 + 13)
3986     .iterations(3)
3987     .TestQS8();
3988 }
3989 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_qmin)3990 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_qmin) {
3991   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
3992   DeconvolutionOperatorTester()
3993     .batch_size(2)
3994     .input_size(kStridedInputHeight, kStridedInputWidth)
3995     .kernel_size(2, 2)
3996     .stride(2)
3997     .groups(2)
3998     .group_input_channels(17)
3999     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4000     .qmin(128)
4001     .iterations(3)
4002     .TestQS8();
4003 }
4004 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_with_qmax)4005 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_with_qmax) {
4006   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4007   DeconvolutionOperatorTester()
4008     .batch_size(2)
4009     .input_size(kStridedInputHeight, kStridedInputWidth)
4010     .kernel_size(2, 2)
4011     .stride(2)
4012     .groups(2)
4013     .group_input_channels(17)
4014     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4015     .qmax(128)
4016     .iterations(3)
4017     .TestQS8();
4018 }
4019 
TEST(DECONVOLUTION_NHWC_QS8,batched_grouped_2x2s2_without_bias)4020 TEST(DECONVOLUTION_NHWC_QS8, batched_grouped_2x2s2_without_bias) {
4021   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4022   DeconvolutionOperatorTester()
4023     .has_bias(false)
4024     .batch_size(2)
4025     .input_size(kStridedInputHeight, kStridedInputWidth)
4026     .kernel_size(2, 2)
4027     .stride(2)
4028     .groups(2)
4029     .group_input_channels(17)
4030     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4031     .iterations(3)
4032     .TestQS8();
4033 }
4034 
TEST(DECONVOLUTION_NHWC_QS8,weights_cache_batched_grouped_2x2s2)4035 TEST(DECONVOLUTION_NHWC_QS8, weights_cache_batched_grouped_2x2s2) {
4036   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4037   DeconvolutionOperatorTester()
4038     .batch_size(2)
4039     .input_size(kStridedInputHeight, kStridedInputWidth)
4040     .kernel_size(2, 2)
4041     .stride(2)
4042     .groups(2)
4043     .group_input_channels(17)
4044     .group_output_channels(xnn_params.qs8.gemm.nr * 2 + 3)
4045     .use_weights_cache(true)
4046     .iterations(3)
4047     .TestQS8();
4048 }
4049 
4050 /**************************** SUBCONV2D/GEMM path, setup ****************************/
4051 
4052 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_setup_changing_batch) {
4053   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4054   DeconvolutionOperatorTester()
4055     .batch_size(2)
4056     .next_batch_size(5)
4057     .input_size(kStridedInputHeight, kStridedInputWidth)
4058     .kernel_size(2, 2)
4059     .stride(2)
4060     .groups(2)
4061     .group_input_channels(15)
4062     .group_output_channels(17)
4063     .TestSetupQS8();
4064 }
4065 
4066 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_setup_changing_height) {
4067   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4068   DeconvolutionOperatorTester()
4069     .batch_size(2)
4070     .input_size(kStridedInputHeight, kStridedInputWidth)
4071     .next_input_height(kStridedInputHeight + 3)
4072     .kernel_size(2, 2)
4073     .stride(2)
4074     .groups(2)
4075     .group_input_channels(15)
4076     .group_output_channels(17)
4077     .TestSetupQS8();
4078 }
4079 
4080 TEST(DECONVOLUTION_NHWC_QS8, 2x2s2_setup_changing_width) {
4081   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4082   DeconvolutionOperatorTester()
4083     .batch_size(2)
4084     .input_size(kStridedInputHeight, kStridedInputWidth)
4085     .next_input_width(kStridedInputWidth + 3)
4086     .kernel_size(2, 2)
4087     .stride(2)
4088     .groups(2)
4089     .group_input_channels(15)
4090     .group_output_channels(17)
4091     .TestSetupQS8();
4092 }
4093 
4094 /**************************** Future GEMM path ****************************/
4095 
4096 TEST(DECONVOLUTION_NHWC_QU8, 1x1) {
4097   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4098   DeconvolutionOperatorTester()
4099     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4100     .kernel_size(1, 1)
4101     .group_input_channels(23)
4102     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4103     .iterations(3)
4104     .TestQU8();
4105 }
4106 
4107 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_input_width) {
4108   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4109   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4110     DeconvolutionOperatorTester()
4111       .input_size(input_height, kUnstridedInputWidth)
4112       .kernel_size(1, 1)
4113       .group_input_channels(23)
4114       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4115       .iterations(1)
4116       .TestQU8();
4117   }
4118 }
4119 
4120 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_input_height) {
4121   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4122   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4123     DeconvolutionOperatorTester()
4124       .input_size(kUnstridedInputHeight, input_width)
4125       .kernel_size(1, 1)
4126       .group_input_channels(23)
4127       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4128       .iterations(1)
4129       .TestQU8();
4130   }
4131 }
4132 
4133 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_input_channels) {
4134   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4135   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4136     DeconvolutionOperatorTester()
4137       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4138       .kernel_size(1, 1)
4139       .group_input_channels(input_channels)
4140       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4141       .iterations(1)
4142       .TestQU8();
4143   }
4144 }
4145 
4146 TEST(DECONVOLUTION_NHWC_QU8, 1x1_varying_output_channels) {
4147   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4148   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4149     DeconvolutionOperatorTester()
4150       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4151       .kernel_size(1, 1)
4152       .group_input_channels(23)
4153       .group_output_channels(output_channels)
4154       .iterations(1)
4155       .TestQU8();
4156   }
4157 }
4158 
4159 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_input_stride) {
4160   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4161   DeconvolutionOperatorTester()
4162     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4163     .kernel_size(1, 1)
4164     .group_input_channels(23)
4165     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4166     .input_pixel_stride(28)
4167     .iterations(3)
4168     .TestQU8();
4169 }
4170 
4171 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_output_stride) {
4172   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4173   DeconvolutionOperatorTester()
4174     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4175     .kernel_size(1, 1)
4176     .group_input_channels(23)
4177     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4178     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4179     .iterations(3)
4180     .TestQU8();
4181 }
4182 
4183 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_qmin) {
4184   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4185   DeconvolutionOperatorTester()
4186     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4187     .kernel_size(1, 1)
4188     .group_input_channels(23)
4189     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4190     .qmin(128)
4191     .iterations(3)
4192     .TestQU8();
4193 }
4194 
4195 TEST(DECONVOLUTION_NHWC_QU8, 1x1_with_qmax) {
4196   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4197   DeconvolutionOperatorTester()
4198     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4199     .kernel_size(1, 1)
4200     .group_input_channels(23)
4201     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4202     .qmax(128)
4203     .iterations(3)
4204     .TestQU8();
4205 }
4206 
4207 TEST(DECONVOLUTION_NHWC_QU8, 1x1_without_bias) {
4208   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4209   DeconvolutionOperatorTester()
4210     .has_bias(false)
4211     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4212     .kernel_size(1, 1)
4213     .group_input_channels(23)
4214     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4215     .iterations(3)
4216     .TestQU8();
4217 }
4218 
4219 /**************************** Future GEMM path, grouped ****************************/
4220 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1)4221 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1) {
4222   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4223   DeconvolutionOperatorTester()
4224     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4225     .kernel_size(1, 1)
4226     .groups(2)
4227     .group_input_channels(23)
4228     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4229     .iterations(3)
4230     .TestQU8();
4231 }
4232 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_input_width)4233 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_input_width) {
4234   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4235   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4236     DeconvolutionOperatorTester()
4237       .input_size(input_height, kUnstridedInputWidth)
4238       .kernel_size(1, 1)
4239       .groups(2)
4240       .group_input_channels(23)
4241       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4242       .iterations(1)
4243       .TestQU8();
4244   }
4245 }
4246 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_input_height)4247 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_input_height) {
4248   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4249   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4250     DeconvolutionOperatorTester()
4251       .input_size(kUnstridedInputHeight, input_width)
4252       .kernel_size(1, 1)
4253       .groups(2)
4254       .group_input_channels(23)
4255       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4256       .iterations(1)
4257       .TestQU8();
4258   }
4259 }
4260 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_input_channels)4261 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_input_channels) {
4262   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4263   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4264     DeconvolutionOperatorTester()
4265       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4266       .kernel_size(1, 1)
4267       .groups(2)
4268       .group_input_channels(input_channels)
4269       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4270       .iterations(1)
4271       .TestQU8();
4272   }
4273 }
4274 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_varying_output_channels)4275 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_varying_output_channels) {
4276   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4277   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4278     DeconvolutionOperatorTester()
4279       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4280       .kernel_size(1, 1)
4281       .groups(2)
4282       .group_input_channels(23)
4283       .group_output_channels(output_channels)
4284       .iterations(1)
4285       .TestQU8();
4286   }
4287 }
4288 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_input_stride)4289 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_input_stride) {
4290   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4291   DeconvolutionOperatorTester()
4292     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4293     .kernel_size(1, 1)
4294     .groups(2)
4295     .group_input_channels(23)
4296     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4297     .input_pixel_stride(47)
4298     .iterations(3)
4299     .TestQU8();
4300 }
4301 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_output_stride)4302 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_output_stride) {
4303   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4304   DeconvolutionOperatorTester()
4305     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4306     .kernel_size(1, 1)
4307     .groups(2)
4308     .group_input_channels(23)
4309     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
4310     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4311     .iterations(3)
4312     .TestQU8();
4313 }
4314 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_qmin)4315 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_qmin) {
4316   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4317   DeconvolutionOperatorTester()
4318     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4319     .kernel_size(1, 1)
4320     .groups(2)
4321     .group_input_channels(23)
4322     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4323     .qmin(128)
4324     .iterations(3)
4325     .TestQU8();
4326 }
4327 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_with_qmax)4328 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_with_qmax) {
4329   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4330   DeconvolutionOperatorTester()
4331     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4332     .kernel_size(1, 1)
4333     .groups(2)
4334     .group_input_channels(23)
4335     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4336     .qmax(128)
4337     .iterations(3)
4338     .TestQU8();
4339 }
4340 
TEST(DECONVOLUTION_NHWC_QU8,grouped_1x1_without_bias)4341 TEST(DECONVOLUTION_NHWC_QU8, grouped_1x1_without_bias) {
4342   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4343   DeconvolutionOperatorTester()
4344     .has_bias(false)
4345     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4346     .kernel_size(1, 1)
4347     .groups(2)
4348     .group_input_channels(23)
4349     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4350     .iterations(3)
4351     .TestQU8();
4352 }
4353 
4354 /**************************** Future GEMM path, batched ****************************/
4355 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1)4356 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1) {
4357   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4358   DeconvolutionOperatorTester()
4359     .batch_size(2)
4360     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4361     .kernel_size(1, 1)
4362     .group_input_channels(23)
4363     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4364     .iterations(3)
4365     .TestQU8();
4366 }
4367 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_input_width)4368 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_input_width) {
4369   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4370   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4371     DeconvolutionOperatorTester()
4372       .batch_size(2)
4373       .input_size(input_height, kUnstridedInputWidth)
4374       .kernel_size(1, 1)
4375       .group_input_channels(23)
4376       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4377       .iterations(1)
4378       .TestQU8();
4379   }
4380 }
4381 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_input_height)4382 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_input_height) {
4383   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4384   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4385     DeconvolutionOperatorTester()
4386       .batch_size(2)
4387       .input_size(kUnstridedInputHeight, input_width)
4388       .kernel_size(1, 1)
4389       .group_input_channels(23)
4390       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4391       .iterations(1)
4392       .TestQU8();
4393   }
4394 }
4395 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_input_channels)4396 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_input_channels) {
4397   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4398   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4399     DeconvolutionOperatorTester()
4400       .batch_size(2)
4401       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4402       .kernel_size(1, 1)
4403       .group_input_channels(input_channels)
4404       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4405       .iterations(1)
4406       .TestQU8();
4407   }
4408 }
4409 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_varying_output_channels)4410 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_varying_output_channels) {
4411   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4412   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4413     DeconvolutionOperatorTester()
4414       .batch_size(2)
4415       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4416       .kernel_size(1, 1)
4417       .group_input_channels(23)
4418       .group_output_channels(output_channels)
4419       .iterations(1)
4420       .TestQU8();
4421   }
4422 }
4423 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_input_stride)4424 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_input_stride) {
4425   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4426   DeconvolutionOperatorTester()
4427     .batch_size(2)
4428     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4429     .kernel_size(1, 1)
4430     .group_input_channels(23)
4431     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4432     .input_pixel_stride(28)
4433     .iterations(3)
4434     .TestQU8();
4435 }
4436 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_output_stride)4437 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_output_stride) {
4438   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4439   DeconvolutionOperatorTester()
4440     .batch_size(2)
4441     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4442     .kernel_size(1, 1)
4443     .group_input_channels(23)
4444     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4445     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4446     .iterations(3)
4447     .TestQU8();
4448 }
4449 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_qmin)4450 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_qmin) {
4451   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4452   DeconvolutionOperatorTester()
4453     .batch_size(2)
4454     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4455     .kernel_size(1, 1)
4456     .group_input_channels(23)
4457     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4458     .qmin(128)
4459     .iterations(3)
4460     .TestQU8();
4461 }
4462 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_with_qmax)4463 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_with_qmax) {
4464   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4465   DeconvolutionOperatorTester()
4466     .batch_size(2)
4467     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4468     .kernel_size(1, 1)
4469     .group_input_channels(23)
4470     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4471     .qmax(128)
4472     .iterations(3)
4473     .TestQU8();
4474 }
4475 
TEST(DECONVOLUTION_NHWC_QU8,batched_1x1_without_bias)4476 TEST(DECONVOLUTION_NHWC_QU8, batched_1x1_without_bias) {
4477   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4478   DeconvolutionOperatorTester()
4479     .has_bias(false)
4480     .batch_size(2)
4481     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4482     .kernel_size(1, 1)
4483     .group_input_channels(23)
4484     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4485     .iterations(3)
4486     .TestQU8();
4487 }
4488 
4489 /**************************** Future GEMM path, batched, grouped ****************************/
4490 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1)4491 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1) {
4492   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4493   DeconvolutionOperatorTester()
4494     .batch_size(2)
4495     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4496     .kernel_size(1, 1)
4497     .groups(2)
4498     .group_input_channels(23)
4499     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4500     .iterations(3)
4501     .TestQU8();
4502 }
4503 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_input_width)4504 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_input_width) {
4505   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4506   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4507     DeconvolutionOperatorTester()
4508       .batch_size(2)
4509       .input_size(input_height, kUnstridedInputWidth)
4510       .kernel_size(1, 1)
4511       .groups(2)
4512       .group_input_channels(23)
4513       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4514       .iterations(1)
4515       .TestQU8();
4516   }
4517 }
4518 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_input_height)4519 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_input_height) {
4520   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4521   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4522     DeconvolutionOperatorTester()
4523       .batch_size(2)
4524       .input_size(kUnstridedInputHeight, input_width)
4525       .kernel_size(1, 1)
4526       .groups(2)
4527       .group_input_channels(23)
4528       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4529       .iterations(1)
4530       .TestQU8();
4531   }
4532 }
4533 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_input_channels)4534 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_input_channels) {
4535   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4536   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4537     DeconvolutionOperatorTester()
4538       .batch_size(2)
4539       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4540       .kernel_size(1, 1)
4541       .groups(2)
4542       .group_input_channels(input_channels)
4543       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4544       .iterations(1)
4545       .TestQU8();
4546   }
4547 }
4548 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_varying_output_channels)4549 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_varying_output_channels) {
4550   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4551   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4552     DeconvolutionOperatorTester()
4553       .batch_size(2)
4554       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4555       .kernel_size(1, 1)
4556       .groups(2)
4557       .group_input_channels(23)
4558       .group_output_channels(output_channels)
4559       .iterations(1)
4560       .TestQU8();
4561   }
4562 }
4563 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_input_stride)4564 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_input_stride) {
4565   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4566   DeconvolutionOperatorTester()
4567     .batch_size(2)
4568     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4569     .kernel_size(1, 1)
4570     .groups(2)
4571     .group_input_channels(23)
4572     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4573     .input_pixel_stride(47)
4574     .iterations(3)
4575     .TestQU8();
4576 }
4577 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_output_stride)4578 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_output_stride) {
4579   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4580   DeconvolutionOperatorTester()
4581     .batch_size(2)
4582     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4583     .kernel_size(1, 1)
4584     .groups(2)
4585     .group_input_channels(23)
4586     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
4587     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4588     .iterations(3)
4589     .TestQU8();
4590 }
4591 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_qmin)4592 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_qmin) {
4593   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4594   DeconvolutionOperatorTester()
4595     .batch_size(2)
4596     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4597     .kernel_size(1, 1)
4598     .groups(2)
4599     .group_input_channels(23)
4600     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4601     .qmin(128)
4602     .iterations(3)
4603     .TestQU8();
4604 }
4605 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_with_qmax)4606 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_with_qmax) {
4607   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4608   DeconvolutionOperatorTester()
4609     .batch_size(2)
4610     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4611     .kernel_size(1, 1)
4612     .groups(2)
4613     .group_input_channels(23)
4614     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4615     .qmax(128)
4616     .iterations(3)
4617     .TestQU8();
4618 }
4619 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_1x1_without_bias)4620 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_1x1_without_bias) {
4621   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4622   DeconvolutionOperatorTester()
4623     .has_bias(false)
4624     .batch_size(2)
4625     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4626     .kernel_size(1, 1)
4627     .groups(2)
4628     .group_input_channels(23)
4629     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4630     .iterations(3)
4631     .TestQU8();
4632 }
4633 
4634 /**************************** CONV path ****************************/
4635 
4636 TEST(DECONVOLUTION_NHWC_QU8, 3x3) {
4637   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4638   DeconvolutionOperatorTester()
4639     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4640     .padding(1)
4641     .kernel_size(3, 3)
4642     .group_input_channels(15)
4643     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4644     .iterations(3)
4645     .TestQU8();
4646 }
4647 
TEST(DECONVOLUTION_NHWC_QU8,Kx3)4648 TEST(DECONVOLUTION_NHWC_QU8, Kx3) {
4649   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4650   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
4651     DeconvolutionOperatorTester()
4652       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4653       .padding_width(1)
4654       .kernel_size(kernel_height, 3)
4655       .group_input_channels(17)
4656       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4657       .iterations(3)
4658       .TestQU8();
4659   }
4660 }
4661 
4662 TEST(DECONVOLUTION_NHWC_QU8, 3xK) {
4663   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4664   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
4665     DeconvolutionOperatorTester()
4666       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4667       .padding_height(1)
4668       .kernel_size(3, kernel_width)
4669       .group_input_channels(17)
4670       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4671       .iterations(3)
4672       .TestQU8();
4673   }
4674 }
4675 
4676 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_height_padding) {
4677   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4678   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
4679     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
4680       DeconvolutionOperatorTester()
4681         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4682         .padding_width(1)
4683         .padding_top(padding_top)
4684         .padding_bottom(padding_bottom)
4685         .kernel_size(3, 3)
4686         .group_input_channels(15)
4687         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4688         .iterations(1)
4689         .TestQU8();
4690     }
4691   }
4692 }
4693 
4694 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_width_padding) {
4695   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4696   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
4697     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
4698       DeconvolutionOperatorTester()
4699         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4700         .padding_height(1)
4701         .padding_left(padding_left)
4702         .padding_right(padding_right)
4703         .kernel_size(3, 3)
4704         .group_input_channels(15)
4705         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4706         .iterations(1)
4707         .TestQU8();
4708     }
4709   }
4710 }
4711 
4712 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_height_adjustment) {
4713   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4714   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
4715     DeconvolutionOperatorTester()
4716       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4717       .padding(1)
4718       .stride_height(adjustment_height + 1)
4719       .adjustment_height(adjustment_height)
4720       .kernel_size(3, 3)
4721       .group_input_channels(15)
4722       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4723       .iterations(1)
4724       .TestQU8();
4725   }
4726 }
4727 
4728 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_width_adjustment) {
4729   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4730   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
4731     DeconvolutionOperatorTester()
4732       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4733       .padding(1)
4734       .stride_width(adjustment_width + 1)
4735       .adjustment_width(adjustment_width)
4736       .kernel_size(3, 3)
4737       .group_input_channels(15)
4738       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4739       .iterations(1)
4740       .TestQU8();
4741   }
4742 }
4743 
4744 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_input_height) {
4745   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4746   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
4747     DeconvolutionOperatorTester()
4748       .input_size(input_height, kUnstridedInputWidth)
4749       .padding(1)
4750       .kernel_size(3, 3)
4751       .group_input_channels(15)
4752       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4753       .iterations(1)
4754       .TestQU8();
4755   }
4756 }
4757 
4758 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_input_width) {
4759   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4760   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
4761     DeconvolutionOperatorTester()
4762       .input_size(kUnstridedInputHeight, input_width)
4763       .padding(1)
4764       .kernel_size(3, 3)
4765       .group_input_channels(15)
4766       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4767       .iterations(1)
4768       .TestQU8();
4769   }
4770 }
4771 
4772 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_input_channels) {
4773   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4774   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
4775     DeconvolutionOperatorTester()
4776       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4777       .padding(1)
4778       .kernel_size(3, 3)
4779       .group_input_channels(input_channels)
4780       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4781       .iterations(1)
4782       .TestQU8();
4783   }
4784 }
4785 
4786 TEST(DECONVOLUTION_NHWC_QU8, 3x3_varying_output_channels) {
4787   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4788   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
4789     DeconvolutionOperatorTester()
4790       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4791       .padding(1)
4792       .kernel_size(3, 3)
4793       .group_input_channels(23)
4794       .group_output_channels(output_channels)
4795       .iterations(1)
4796       .TestQU8();
4797   }
4798 }
4799 
4800 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_height_dilation) {
4801   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4802   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
4803     DeconvolutionOperatorTester()
4804       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4805       .padding(1)
4806       .kernel_size(3, 3)
4807       .dilation_height(dilation_height)
4808       .group_input_channels(23)
4809       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4810       .iterations(3)
4811       .TestQU8();
4812   }
4813 }
4814 
4815 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_width_dilation) {
4816   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4817   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
4818     DeconvolutionOperatorTester()
4819       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4820       .padding(1)
4821       .kernel_size(3, 3)
4822       .dilation_width(dilation_width)
4823       .group_input_channels(23)
4824       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4825       .iterations(3)
4826       .TestQU8();
4827   }
4828 }
4829 
4830 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_height_dilation_and_stride) {
4831   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4832   DeconvolutionOperatorTester()
4833     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4834     .padding(1)
4835     .kernel_size(3, 3)
4836     .dilation_height(3)
4837     .stride_height(2)
4838     .group_input_channels(23)
4839     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4840     .iterations(3)
4841     .TestQU8();
4842 }
4843 
4844 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_width_dilation_and_stride) {
4845   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4846   DeconvolutionOperatorTester()
4847     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4848     .padding(1)
4849     .kernel_size(3, 3)
4850     .dilation_width(3)
4851     .stride_width(2)
4852     .group_input_channels(23)
4853     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4854     .iterations(3)
4855     .TestQU8();
4856 }
4857 
4858 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_input_stride) {
4859   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4860   DeconvolutionOperatorTester()
4861     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4862     .padding(1)
4863     .kernel_size(3, 3)
4864     .group_input_channels(23)
4865     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4866     .input_pixel_stride(28)
4867     .iterations(3)
4868     .TestQU8();
4869 }
4870 
4871 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_output_stride) {
4872   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4873   DeconvolutionOperatorTester()
4874     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4875     .padding(1)
4876     .kernel_size(3, 3)
4877     .group_input_channels(23)
4878     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4879     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
4880     .iterations(3)
4881     .TestQU8();
4882 }
4883 
4884 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_qmin) {
4885   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4886   DeconvolutionOperatorTester()
4887     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4888     .padding(1)
4889     .kernel_size(3, 3)
4890     .group_input_channels(23)
4891     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4892     .qmin(128)
4893     .iterations(3)
4894     .TestQU8();
4895 }
4896 
4897 TEST(DECONVOLUTION_NHWC_QU8, 3x3_with_qmax) {
4898   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4899   DeconvolutionOperatorTester()
4900     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4901     .padding(1)
4902     .kernel_size(3, 3)
4903     .group_input_channels(23)
4904     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4905     .qmax(128)
4906     .iterations(3)
4907     .TestQU8();
4908 }
4909 
4910 TEST(DECONVOLUTION_NHWC_QU8, 3x3_without_bias) {
4911   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4912   DeconvolutionOperatorTester()
4913     .has_bias(false)
4914     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4915     .padding(1)
4916     .kernel_size(3, 3)
4917     .group_input_channels(23)
4918     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4919     .iterations(3)
4920     .TestQU8();
4921 }
4922 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_3x3)4923 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_3x3) {
4924   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4925   DeconvolutionOperatorTester()
4926     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4927     .padding(1)
4928     .kernel_size(3, 3)
4929     .group_input_channels(15)
4930     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4931     .use_weights_cache(true)
4932     .iterations(3)
4933     .TestQU8();
4934 }
4935 
4936 /**************************** CONV path, grouped ****************************/
4937 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3)4938 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3) {
4939   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4940   DeconvolutionOperatorTester()
4941     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4942     .padding(1)
4943     .kernel_size(3, 3)
4944     .groups(2)
4945     .group_input_channels(15)
4946     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4947     .iterations(3)
4948     .TestQU8();
4949 }
4950 
TEST(DECONVOLUTION_NHWC_QU8,grouped_Kx3)4951 TEST(DECONVOLUTION_NHWC_QU8, grouped_Kx3) {
4952   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4953   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
4954     DeconvolutionOperatorTester()
4955       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4956       .padding_width(1)
4957       .kernel_size(kernel_height, 3)
4958       .groups(2)
4959       .group_input_channels(17)
4960       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4961       .iterations(3)
4962       .TestQU8();
4963   }
4964 }
4965 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3xK)4966 TEST(DECONVOLUTION_NHWC_QU8, grouped_3xK) {
4967   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4968   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
4969     DeconvolutionOperatorTester()
4970       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4971       .padding_height(1)
4972       .kernel_size(3, kernel_width)
4973       .groups(2)
4974       .group_input_channels(17)
4975       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4976       .iterations(3)
4977       .TestQU8();
4978   }
4979 }
4980 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_height_padding)4981 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_height_padding) {
4982   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
4983   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
4984     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
4985       DeconvolutionOperatorTester()
4986         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
4987         .padding_width(1)
4988         .padding_top(padding_top)
4989         .padding_bottom(padding_bottom)
4990         .kernel_size(3, 3)
4991         .groups(2)
4992         .group_input_channels(15)
4993         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
4994         .iterations(1)
4995         .TestQU8();
4996     }
4997   }
4998 }
4999 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_width_padding)5000 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_width_padding) {
5001   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5002   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
5003     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
5004       DeconvolutionOperatorTester()
5005         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5006         .padding_height(1)
5007         .padding_left(padding_left)
5008         .padding_right(padding_right)
5009         .kernel_size(3, 3)
5010         .groups(2)
5011         .group_input_channels(15)
5012         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5013         .iterations(1)
5014         .TestQU8();
5015     }
5016   }
5017 }
5018 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_height_adjustment)5019 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_height_adjustment) {
5020   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5021   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
5022     DeconvolutionOperatorTester()
5023       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5024       .padding(1)
5025       .stride_height(adjustment_height + 1)
5026       .adjustment_height(adjustment_height)
5027       .kernel_size(3, 3)
5028       .groups(2)
5029       .group_input_channels(15)
5030       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5031       .iterations(1)
5032       .TestQU8();
5033   }
5034 }
5035 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_width_adjustment)5036 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_width_adjustment) {
5037   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5038   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
5039     DeconvolutionOperatorTester()
5040       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5041       .padding(1)
5042       .stride_width(adjustment_width + 1)
5043       .adjustment_width(adjustment_width)
5044       .kernel_size(3, 3)
5045       .groups(2)
5046       .group_input_channels(15)
5047       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5048       .iterations(1)
5049       .TestQU8();
5050   }
5051 }
5052 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_input_height)5053 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_input_height) {
5054   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5055   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
5056     DeconvolutionOperatorTester()
5057       .input_size(input_height, kUnstridedInputWidth)
5058       .padding(1)
5059       .kernel_size(3, 3)
5060       .groups(2)
5061       .group_input_channels(15)
5062       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5063       .iterations(1)
5064       .TestQU8();
5065   }
5066 }
5067 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_input_width)5068 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_input_width) {
5069   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5070   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
5071     DeconvolutionOperatorTester()
5072       .input_size(kUnstridedInputHeight, input_width)
5073       .padding(1)
5074       .kernel_size(3, 3)
5075       .groups(2)
5076       .group_input_channels(15)
5077       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5078       .iterations(1)
5079       .TestQU8();
5080   }
5081 }
5082 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_input_channels)5083 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_input_channels) {
5084   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5085   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
5086     DeconvolutionOperatorTester()
5087       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5088       .padding(1)
5089       .kernel_size(3, 3)
5090       .groups(2)
5091       .group_input_channels(input_channels)
5092       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5093       .iterations(1)
5094       .TestQU8();
5095   }
5096 }
5097 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_varying_output_channels)5098 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_varying_output_channels) {
5099   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5100   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
5101     DeconvolutionOperatorTester()
5102       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5103       .padding(1)
5104       .kernel_size(3, 3)
5105       .groups(2)
5106       .group_input_channels(23)
5107       .group_output_channels(output_channels)
5108       .iterations(1)
5109       .TestQU8();
5110   }
5111 }
5112 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_height_dilation)5113 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_height_dilation) {
5114   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5115   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
5116     DeconvolutionOperatorTester()
5117       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5118       .padding(1)
5119       .kernel_size(3, 3)
5120       .dilation_height(dilation_height)
5121       .groups(2)
5122       .group_input_channels(23)
5123       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5124       .iterations(3)
5125       .TestQU8();
5126   }
5127 }
5128 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_width_dilation)5129 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_width_dilation) {
5130   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5131   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
5132     DeconvolutionOperatorTester()
5133       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5134       .padding(1)
5135       .kernel_size(3, 3)
5136       .dilation_width(dilation_width)
5137       .groups(2)
5138       .group_input_channels(23)
5139       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5140       .iterations(3)
5141       .TestQU8();
5142   }
5143 }
5144 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_height_dilation_and_stride)5145 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_height_dilation_and_stride) {
5146   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5147   DeconvolutionOperatorTester()
5148     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5149     .padding(1)
5150     .kernel_size(3, 3)
5151     .dilation_height(3)
5152     .stride_height(2)
5153     .groups(2)
5154     .group_input_channels(23)
5155     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5156     .iterations(3)
5157     .TestQU8();
5158 }
5159 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_width_dilation_and_stride)5160 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_width_dilation_and_stride) {
5161   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5162   DeconvolutionOperatorTester()
5163     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5164     .padding(1)
5165     .kernel_size(3, 3)
5166     .dilation_width(3)
5167     .stride_width(2)
5168     .groups(2)
5169     .group_input_channels(23)
5170     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5171     .iterations(3)
5172     .TestQU8();
5173 }
5174 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_input_stride)5175 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_input_stride) {
5176   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5177   DeconvolutionOperatorTester()
5178     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5179     .padding(1)
5180     .kernel_size(3, 3)
5181     .groups(2)
5182     .group_input_channels(23)
5183     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5184     .input_pixel_stride(47)
5185     .iterations(3)
5186     .TestQU8();
5187 }
5188 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_output_stride)5189 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_output_stride) {
5190   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5191   DeconvolutionOperatorTester()
5192     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5193     .padding(1)
5194     .kernel_size(3, 3)
5195     .groups(2)
5196     .group_input_channels(23)
5197     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
5198     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
5199     .iterations(3)
5200     .TestQU8();
5201 }
5202 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_qmin)5203 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_qmin) {
5204   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5205   DeconvolutionOperatorTester()
5206     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5207     .padding(1)
5208     .kernel_size(3, 3)
5209     .groups(2)
5210     .group_input_channels(23)
5211     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5212     .qmin(128)
5213     .iterations(3)
5214     .TestQU8();
5215 }
5216 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_with_qmax)5217 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_with_qmax) {
5218   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5219   DeconvolutionOperatorTester()
5220     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5221     .padding(1)
5222     .kernel_size(3, 3)
5223     .groups(2)
5224     .group_input_channels(23)
5225     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5226     .qmax(128)
5227     .iterations(3)
5228     .TestQU8();
5229 }
5230 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3_without_bias)5231 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3_without_bias) {
5232   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5233   DeconvolutionOperatorTester()
5234     .has_bias(false)
5235     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5236     .padding(1)
5237     .kernel_size(3, 3)
5238     .groups(2)
5239     .group_input_channels(23)
5240     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5241     .iterations(3)
5242     .TestQU8();
5243 }
5244 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_grouped_3x3)5245 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_grouped_3x3) {
5246   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5247   DeconvolutionOperatorTester()
5248     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5249     .padding(1)
5250     .kernel_size(3, 3)
5251     .groups(2)
5252     .group_input_channels(15)
5253     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5254     .use_weights_cache(true)
5255     .iterations(3)
5256     .TestQU8();
5257 }
5258 
5259 /**************************** CONV path, batched ****************************/
5260 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3)5261 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3) {
5262   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5263   DeconvolutionOperatorTester()
5264     .batch_size(2)
5265     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5266     .padding(1)
5267     .kernel_size(3, 3)
5268     .group_input_channels(15)
5269     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5270     .iterations(3)
5271     .TestQU8();
5272 }
5273 
TEST(DECONVOLUTION_NHWC_QU8,batched_Kx3)5274 TEST(DECONVOLUTION_NHWC_QU8, batched_Kx3) {
5275   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5276   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
5277     DeconvolutionOperatorTester()
5278       .batch_size(2)
5279       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5280       .padding_width(1)
5281       .kernel_size(kernel_height, 3)
5282       .group_input_channels(17)
5283       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5284       .iterations(3)
5285       .TestQU8();
5286   }
5287 }
5288 
TEST(DECONVOLUTION_NHWC_QU8,batched_3xK)5289 TEST(DECONVOLUTION_NHWC_QU8, batched_3xK) {
5290   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5291   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
5292     DeconvolutionOperatorTester()
5293       .batch_size(2)
5294       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5295       .padding_height(1)
5296       .kernel_size(3, kernel_width)
5297       .group_input_channels(17)
5298       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5299       .iterations(3)
5300       .TestQU8();
5301   }
5302 }
5303 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_height_padding)5304 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_height_padding) {
5305   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5306   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
5307     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
5308       DeconvolutionOperatorTester()
5309         .batch_size(2)
5310         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5311         .padding_width(1)
5312         .padding_top(padding_top)
5313         .padding_bottom(padding_bottom)
5314         .kernel_size(3, 3)
5315         .group_input_channels(15)
5316         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5317         .iterations(1)
5318         .TestQU8();
5319     }
5320   }
5321 }
5322 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_width_padding)5323 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_width_padding) {
5324   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5325   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
5326     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
5327       DeconvolutionOperatorTester()
5328         .batch_size(2)
5329         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5330         .padding_height(1)
5331         .padding_left(padding_left)
5332         .padding_right(padding_right)
5333         .kernel_size(3, 3)
5334         .group_input_channels(15)
5335         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5336         .iterations(1)
5337         .TestQU8();
5338     }
5339   }
5340 }
5341 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_height_adjustment)5342 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_height_adjustment) {
5343   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5344   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
5345     DeconvolutionOperatorTester()
5346       .batch_size(2)
5347       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5348       .padding(1)
5349       .stride_height(adjustment_height + 1)
5350       .adjustment_height(adjustment_height)
5351       .kernel_size(3, 3)
5352       .group_input_channels(15)
5353       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5354       .iterations(1)
5355       .TestQU8();
5356   }
5357 }
5358 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_width_adjustment)5359 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_width_adjustment) {
5360   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5361   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
5362     DeconvolutionOperatorTester()
5363       .batch_size(2)
5364       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5365       .padding(1)
5366       .stride_width(adjustment_width + 1)
5367       .adjustment_width(adjustment_width)
5368       .kernel_size(3, 3)
5369       .group_input_channels(15)
5370       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5371       .iterations(1)
5372       .TestQU8();
5373   }
5374 }
5375 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_input_height)5376 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_input_height) {
5377   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5378   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
5379     DeconvolutionOperatorTester()
5380       .batch_size(2)
5381       .input_size(input_height, kUnstridedInputWidth)
5382       .padding(1)
5383       .kernel_size(3, 3)
5384       .group_input_channels(15)
5385       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5386       .iterations(1)
5387       .TestQU8();
5388   }
5389 }
5390 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_input_width)5391 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_input_width) {
5392   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5393   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
5394     DeconvolutionOperatorTester()
5395       .batch_size(2)
5396       .input_size(kUnstridedInputHeight, input_width)
5397       .padding(1)
5398       .kernel_size(3, 3)
5399       .group_input_channels(15)
5400       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5401       .iterations(1)
5402       .TestQU8();
5403   }
5404 }
5405 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_input_channels)5406 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_input_channels) {
5407   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5408   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
5409     DeconvolutionOperatorTester()
5410       .batch_size(2)
5411       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5412       .padding(1)
5413       .kernel_size(3, 3)
5414       .group_input_channels(input_channels)
5415       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5416       .iterations(1)
5417       .TestQU8();
5418   }
5419 }
5420 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_varying_output_channels)5421 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_varying_output_channels) {
5422   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5423   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
5424     DeconvolutionOperatorTester()
5425       .batch_size(2)
5426       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5427       .padding(1)
5428       .kernel_size(3, 3)
5429       .group_input_channels(23)
5430       .group_output_channels(output_channels)
5431       .iterations(1)
5432       .TestQU8();
5433   }
5434 }
5435 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_height_dilation)5436 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_height_dilation) {
5437   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5438   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
5439     DeconvolutionOperatorTester()
5440       .batch_size(2)
5441       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5442       .padding(1)
5443       .kernel_size(3, 3)
5444       .dilation_height(dilation_height)
5445       .group_input_channels(23)
5446       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5447       .iterations(3)
5448       .TestQU8();
5449   }
5450 }
5451 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_width_dilation)5452 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_width_dilation) {
5453   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5454   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
5455     DeconvolutionOperatorTester()
5456       .batch_size(2)
5457       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5458       .padding(1)
5459       .kernel_size(3, 3)
5460       .dilation_width(dilation_width)
5461       .group_input_channels(23)
5462       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5463       .iterations(3)
5464       .TestQU8();
5465   }
5466 }
5467 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_height_dilation_and_stride)5468 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_height_dilation_and_stride) {
5469   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5470   DeconvolutionOperatorTester()
5471     .batch_size(2)
5472     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5473     .padding(1)
5474     .kernel_size(3, 3)
5475     .dilation_height(3)
5476     .stride_height(2)
5477     .group_input_channels(23)
5478     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5479     .iterations(3)
5480     .TestQU8();
5481 }
5482 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_width_dilation_and_stride)5483 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_width_dilation_and_stride) {
5484   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5485   DeconvolutionOperatorTester()
5486     .batch_size(2)
5487     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5488     .padding(1)
5489     .kernel_size(3, 3)
5490     .dilation_width(3)
5491     .stride_width(2)
5492     .group_input_channels(23)
5493     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5494     .iterations(3)
5495     .TestQU8();
5496 }
5497 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_input_stride)5498 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_input_stride) {
5499   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5500   DeconvolutionOperatorTester()
5501     .batch_size(2)
5502     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5503     .padding(1)
5504     .kernel_size(3, 3)
5505     .group_input_channels(23)
5506     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5507     .input_pixel_stride(28)
5508     .iterations(3)
5509     .TestQU8();
5510 }
5511 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_output_stride)5512 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_output_stride) {
5513   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5514   DeconvolutionOperatorTester()
5515     .batch_size(2)
5516     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5517     .padding(1)
5518     .kernel_size(3, 3)
5519     .group_input_channels(23)
5520     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5521     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
5522     .iterations(3)
5523     .TestQU8();
5524 }
5525 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_qmin)5526 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_qmin) {
5527   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5528   DeconvolutionOperatorTester()
5529     .batch_size(2)
5530     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5531     .padding(1)
5532     .kernel_size(3, 3)
5533     .group_input_channels(23)
5534     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5535     .qmin(128)
5536     .iterations(3)
5537     .TestQU8();
5538 }
5539 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_with_qmax)5540 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_with_qmax) {
5541   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5542   DeconvolutionOperatorTester()
5543     .batch_size(2)
5544     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5545     .padding(1)
5546     .kernel_size(3, 3)
5547     .group_input_channels(23)
5548     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5549     .qmax(128)
5550     .iterations(3)
5551     .TestQU8();
5552 }
5553 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3_without_bias)5554 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3_without_bias) {
5555   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5556   DeconvolutionOperatorTester()
5557     .has_bias(false)
5558     .batch_size(2)
5559     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5560     .padding(1)
5561     .kernel_size(3, 3)
5562     .group_input_channels(23)
5563     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5564     .iterations(3)
5565     .TestQU8();
5566 }
5567 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_3x3)5568 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_3x3) {
5569   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5570   DeconvolutionOperatorTester()
5571     .batch_size(2)
5572     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5573     .padding(1)
5574     .kernel_size(3, 3)
5575     .group_input_channels(15)
5576     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5577     .use_weights_cache(true)
5578     .iterations(3)
5579     .TestQU8();
5580 }
5581 
5582 /**************************** CONV path, grouped, batched ****************************/
5583 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3)5584 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3) {
5585   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5586   DeconvolutionOperatorTester()
5587     .batch_size(2)
5588     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5589     .padding(1)
5590     .kernel_size(3, 3)
5591     .groups(2)
5592     .group_input_channels(15)
5593     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5594     .iterations(3)
5595     .TestQU8();
5596 }
5597 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_Kx3)5598 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_Kx3) {
5599   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5600   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
5601     DeconvolutionOperatorTester()
5602       .batch_size(2)
5603       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5604       .padding_width(1)
5605       .kernel_size(kernel_height, 3)
5606       .groups(2)
5607       .group_input_channels(17)
5608       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5609       .iterations(3)
5610       .TestQU8();
5611   }
5612 }
5613 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3xK)5614 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3xK) {
5615   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5616   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
5617     DeconvolutionOperatorTester()
5618       .batch_size(2)
5619       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5620       .padding_height(1)
5621       .kernel_size(3, kernel_width)
5622       .groups(2)
5623       .group_input_channels(17)
5624       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5625       .iterations(3)
5626       .TestQU8();
5627   }
5628 }
5629 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_height_padding)5630 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_height_padding) {
5631   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5632   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
5633     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
5634       DeconvolutionOperatorTester()
5635         .batch_size(2)
5636         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5637         .padding_width(1)
5638         .padding_top(padding_top)
5639         .padding_bottom(padding_bottom)
5640         .kernel_size(3, 3)
5641         .groups(2)
5642         .group_input_channels(15)
5643         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5644         .iterations(1)
5645         .TestQU8();
5646     }
5647   }
5648 }
5649 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_width_padding)5650 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_width_padding) {
5651   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5652   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
5653     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
5654       DeconvolutionOperatorTester()
5655         .batch_size(2)
5656         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5657         .padding_height(1)
5658         .padding_left(padding_left)
5659         .padding_right(padding_right)
5660         .kernel_size(3, 3)
5661         .groups(2)
5662         .group_input_channels(15)
5663         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5664         .iterations(1)
5665         .TestQU8();
5666     }
5667   }
5668 }
5669 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_height_adjustment)5670 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_height_adjustment) {
5671   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5672   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
5673     DeconvolutionOperatorTester()
5674       .batch_size(2)
5675       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5676       .padding(1)
5677       .stride_height(adjustment_height + 1)
5678       .adjustment_height(adjustment_height)
5679       .kernel_size(3, 3)
5680       .groups(2)
5681       .group_input_channels(15)
5682       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5683       .iterations(1)
5684       .TestQU8();
5685   }
5686 }
5687 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_width_adjustment)5688 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_width_adjustment) {
5689   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5690   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
5691     DeconvolutionOperatorTester()
5692       .batch_size(2)
5693       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5694       .padding(1)
5695       .stride_width(adjustment_width + 1)
5696       .adjustment_width(adjustment_width)
5697       .kernel_size(3, 3)
5698       .groups(2)
5699       .group_input_channels(15)
5700       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5701       .iterations(1)
5702       .TestQU8();
5703   }
5704 }
5705 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_input_height)5706 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_input_height) {
5707   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5708   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
5709     DeconvolutionOperatorTester()
5710       .batch_size(2)
5711       .input_size(input_height, kUnstridedInputWidth)
5712       .padding(1)
5713       .kernel_size(3, 3)
5714       .groups(2)
5715       .group_input_channels(15)
5716       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5717       .iterations(1)
5718       .TestQU8();
5719   }
5720 }
5721 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_input_width)5722 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_input_width) {
5723   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5724   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
5725     DeconvolutionOperatorTester()
5726       .batch_size(2)
5727       .input_size(kUnstridedInputHeight, input_width)
5728       .padding(1)
5729       .kernel_size(3, 3)
5730       .groups(2)
5731       .group_input_channels(15)
5732       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5733       .iterations(1)
5734       .TestQU8();
5735   }
5736 }
5737 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_input_channels)5738 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_input_channels) {
5739   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5740   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
5741     DeconvolutionOperatorTester()
5742       .batch_size(2)
5743       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5744       .padding(1)
5745       .kernel_size(3, 3)
5746       .groups(2)
5747       .group_input_channels(input_channels)
5748       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5749       .iterations(1)
5750       .TestQU8();
5751   }
5752 }
5753 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_varying_output_channels)5754 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_varying_output_channels) {
5755   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5756   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
5757     DeconvolutionOperatorTester()
5758       .batch_size(2)
5759       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5760       .padding(1)
5761       .kernel_size(3, 3)
5762       .groups(2)
5763       .group_input_channels(23)
5764       .group_output_channels(output_channels)
5765       .iterations(1)
5766       .TestQU8();
5767   }
5768 }
5769 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_height_dilation)5770 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_height_dilation) {
5771   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5772   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
5773     DeconvolutionOperatorTester()
5774       .batch_size(2)
5775       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5776       .padding(1)
5777       .kernel_size(3, 3)
5778       .dilation_height(dilation_height)
5779       .groups(2)
5780       .group_input_channels(23)
5781       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5782       .iterations(3)
5783       .TestQU8();
5784   }
5785 }
5786 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_width_dilation)5787 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_width_dilation) {
5788   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5789   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
5790     DeconvolutionOperatorTester()
5791       .batch_size(2)
5792       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5793       .padding(1)
5794       .kernel_size(3, 3)
5795       .dilation_width(dilation_width)
5796       .groups(2)
5797       .group_input_channels(23)
5798       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5799       .iterations(3)
5800       .TestQU8();
5801   }
5802 }
5803 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_height_dilation_and_stride)5804 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_height_dilation_and_stride) {
5805   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5806   DeconvolutionOperatorTester()
5807     .batch_size(2)
5808     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5809     .padding(1)
5810     .kernel_size(3, 3)
5811     .dilation_height(3)
5812     .stride_width(2)
5813     .groups(2)
5814     .group_input_channels(23)
5815     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5816     .iterations(3)
5817     .TestQU8();
5818 }
5819 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_width_dilation_and_stride)5820 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_width_dilation_and_stride) {
5821   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5822   DeconvolutionOperatorTester()
5823     .batch_size(2)
5824     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5825     .padding(1)
5826     .kernel_size(3, 3)
5827     .dilation_width(3)
5828     .stride_width(2)
5829     .groups(2)
5830     .group_input_channels(23)
5831     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5832     .iterations(3)
5833     .TestQU8();
5834 }
5835 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_input_stride)5836 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_input_stride) {
5837   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5838   DeconvolutionOperatorTester()
5839     .batch_size(2)
5840     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5841     .padding(1)
5842     .kernel_size(3, 3)
5843     .groups(2)
5844     .group_input_channels(23)
5845     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5846     .input_pixel_stride(47)
5847     .iterations(3)
5848     .TestQU8();
5849 }
5850 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_output_stride)5851 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_output_stride) {
5852   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5853   DeconvolutionOperatorTester()
5854     .batch_size(2)
5855     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5856     .padding(1)
5857     .kernel_size(3, 3)
5858     .groups(2)
5859     .group_input_channels(23)
5860     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
5861     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
5862     .iterations(3)
5863     .TestQU8();
5864 }
5865 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_qmin)5866 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_qmin) {
5867   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5868   DeconvolutionOperatorTester()
5869     .batch_size(2)
5870     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5871     .padding(1)
5872     .kernel_size(3, 3)
5873     .groups(2)
5874     .group_input_channels(23)
5875     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5876     .qmin(128)
5877     .iterations(3)
5878     .TestQU8();
5879 }
5880 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_with_qmax)5881 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_with_qmax) {
5882   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5883   DeconvolutionOperatorTester()
5884     .batch_size(2)
5885     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5886     .padding(1)
5887     .kernel_size(3, 3)
5888     .groups(2)
5889     .group_input_channels(23)
5890     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5891     .qmax(128)
5892     .iterations(3)
5893     .TestQU8();
5894 }
5895 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3_without_bias)5896 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3_without_bias) {
5897   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5898   DeconvolutionOperatorTester()
5899     .has_bias(false)
5900     .batch_size(2)
5901     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5902     .padding(1)
5903     .kernel_size(3, 3)
5904     .groups(2)
5905     .group_input_channels(23)
5906     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5907     .iterations(3)
5908     .TestQU8();
5909 }
5910 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_grouped_3x3)5911 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_grouped_3x3) {
5912   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5913   DeconvolutionOperatorTester()
5914     .batch_size(2)
5915     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5916     .padding(1)
5917     .kernel_size(3, 3)
5918     .groups(2)
5919     .group_input_channels(15)
5920     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5921     .use_weights_cache(true)
5922     .iterations(3)
5923     .TestQU8();
5924 }
5925 
5926 /**************************** CONV path, setup ****************************/
5927 
5928 TEST(DECONVOLUTION_NHWC_QU8, 3x3_setup_changing_batch) {
5929   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5930   DeconvolutionOperatorTester()
5931     .batch_size(2)
5932     .next_batch_size(5)
5933     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5934     .kernel_height(3)
5935     .kernel_width(5)
5936     .groups(2)
5937     .group_input_channels(15)
5938     .group_output_channels(17)
5939     .TestSetupQU8();
5940 }
5941 
5942 TEST(DECONVOLUTION_NHWC_QU8, 3x3_setup_changing_height) {
5943   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5944   DeconvolutionOperatorTester()
5945     .batch_size(2)
5946     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5947     .next_input_height(kUnstridedInputHeight + 3)
5948     .kernel_height(3)
5949     .kernel_width(5)
5950     .groups(2)
5951     .group_input_channels(15)
5952     .group_output_channels(17)
5953     .TestSetupQU8();
5954 }
5955 
5956 TEST(DECONVOLUTION_NHWC_QU8, 3x3_setup_changing_width) {
5957   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5958   DeconvolutionOperatorTester()
5959     .batch_size(2)
5960     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
5961     .next_input_width(kUnstridedInputWidth + 3)
5962     .kernel_height(3)
5963     .kernel_width(5)
5964     .groups(2)
5965     .group_input_channels(15)
5966     .group_output_channels(17)
5967     .TestSetupQU8();
5968 }
5969 
5970 /**************************** SUBCONV2D/IGEMM path ****************************/
5971 
5972 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2) {
5973   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5974   DeconvolutionOperatorTester()
5975     .input_size(kStridedInputHeight, kStridedInputWidth)
5976     .padding(1)
5977     .kernel_size(3, 3)
5978     .stride(2)
5979     .group_input_channels(15)
5980     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5981     .iterations(3)
5982     .TestQU8();
5983 }
5984 
TEST(DECONVOLUTION_NHWC_QU8,Kx3s2)5985 TEST(DECONVOLUTION_NHWC_QU8, Kx3s2) {
5986   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
5987   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
5988     DeconvolutionOperatorTester()
5989       .input_size(kStridedInputHeight, kStridedInputWidth)
5990       .padding_width(1)
5991       .kernel_size(kernel_height, 3)
5992       .stride(2)
5993       .group_input_channels(17)
5994       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
5995       .iterations(3)
5996       .TestQU8();
5997   }
5998 }
5999 
6000 TEST(DECONVOLUTION_NHWC_QU8, 3xKs2) {
6001   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6002   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6003     DeconvolutionOperatorTester()
6004       .input_size(kStridedInputHeight, kStridedInputWidth)
6005       .padding_height(1)
6006       .kernel_size(3, kernel_width)
6007       .stride(2)
6008       .group_input_channels(17)
6009       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6010       .iterations(3)
6011       .TestQU8();
6012   }
6013 }
6014 
6015 TEST(DECONVOLUTION_NHWC_QU8, 3x3sSx1) {
6016   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6017   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6018     DeconvolutionOperatorTester()
6019       .input_size(kStridedInputHeight, kStridedInputWidth)
6020       .padding(1)
6021       .padding_width(1)
6022       .kernel_size(3, 3)
6023       .stride_height(stride_height)
6024       .group_input_channels(17)
6025       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6026       .iterations(3)
6027       .TestQU8();
6028   }
6029 }
6030 
6031 TEST(DECONVOLUTION_NHWC_QU8, 3x3s1xS) {
6032   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6033   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6034     DeconvolutionOperatorTester()
6035       .input_size(kStridedInputHeight, kStridedInputWidth)
6036       .padding(1)
6037       .padding_width(1)
6038       .kernel_size(3, 3)
6039       .stride_width(stride_width)
6040       .group_input_channels(17)
6041       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6042       .iterations(3)
6043       .TestQU8();
6044   }
6045 }
6046 
6047 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_height_padding) {
6048   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6049   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6050     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6051       DeconvolutionOperatorTester()
6052         .input_size(kStridedInputHeight, kStridedInputWidth)
6053         .padding_width(1)
6054         .padding_top(padding_top)
6055         .padding_bottom(padding_bottom)
6056         .kernel_size(3, 3)
6057         .stride(2)
6058         .group_input_channels(15)
6059         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6060         .iterations(1)
6061         .TestQU8();
6062     }
6063   }
6064 }
6065 
6066 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_width_padding) {
6067   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6068   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6069     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6070       DeconvolutionOperatorTester()
6071         .input_size(kStridedInputHeight, kStridedInputWidth)
6072         .padding_height(1)
6073         .padding_left(padding_left)
6074         .padding_right(padding_right)
6075         .kernel_size(3, 3)
6076         .stride(2)
6077         .group_input_channels(15)
6078         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6079         .iterations(1)
6080         .TestQU8();
6081     }
6082   }
6083 }
6084 
6085 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_height_adjustment) {
6086   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6087   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
6088     DeconvolutionOperatorTester()
6089       .input_size(kStridedInputHeight, kStridedInputWidth)
6090       .padding(1)
6091       .adjustment_height(adjustment_height)
6092       .kernel_size(3, 3)
6093       .stride(2)
6094       .group_input_channels(15)
6095       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6096       .iterations(1)
6097       .TestQU8();
6098   }
6099 }
6100 
6101 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_width_adjustment) {
6102   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6103   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
6104     DeconvolutionOperatorTester()
6105       .input_size(kStridedInputHeight, kStridedInputWidth)
6106       .padding(1)
6107       .adjustment_width(adjustment_width)
6108       .kernel_size(3, 3)
6109       .stride(2)
6110       .group_input_channels(15)
6111       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6112       .iterations(1)
6113       .TestQU8();
6114   }
6115 }
6116 
6117 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_input_height) {
6118   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6119   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
6120     DeconvolutionOperatorTester()
6121       .input_size(input_height, kStridedInputWidth)
6122       .padding(1)
6123       .kernel_size(3, 3)
6124       .stride(2)
6125       .group_input_channels(15)
6126       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6127       .iterations(1)
6128       .TestQU8();
6129   }
6130 }
6131 
6132 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_input_width) {
6133   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6134   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
6135     DeconvolutionOperatorTester()
6136       .input_size(kStridedInputHeight, kStridedInputWidth)
6137       .padding(1)
6138       .kernel_size(3, 3)
6139       .stride(2)
6140       .group_input_channels(15)
6141       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6142       .iterations(1)
6143       .TestQU8();
6144   }
6145 }
6146 
6147 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_input_channels) {
6148   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6149   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
6150     DeconvolutionOperatorTester()
6151       .input_size(kStridedInputHeight, kStridedInputWidth)
6152       .padding(1)
6153       .kernel_size(3, 3)
6154       .stride(2)
6155       .group_input_channels(input_channels)
6156       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6157       .iterations(1)
6158       .TestQU8();
6159   }
6160 }
6161 
6162 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_varying_output_channels) {
6163   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6164   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
6165     DeconvolutionOperatorTester()
6166       .input_size(kStridedInputHeight, kStridedInputWidth)
6167       .padding(1)
6168       .kernel_size(3, 3)
6169       .stride(2)
6170       .group_input_channels(23)
6171       .group_output_channels(output_channels)
6172       .iterations(1)
6173       .TestQU8();
6174   }
6175 }
6176 
6177 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_input_stride) {
6178   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6179   DeconvolutionOperatorTester()
6180     .input_size(kStridedInputHeight, kStridedInputWidth)
6181     .padding(1)
6182     .kernel_size(3, 3)
6183     .stride(2)
6184     .group_input_channels(23)
6185     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6186     .input_pixel_stride(28)
6187     .iterations(3)
6188     .TestQU8();
6189 }
6190 
6191 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_output_stride) {
6192   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6193   DeconvolutionOperatorTester()
6194     .input_size(kStridedInputHeight, kStridedInputWidth)
6195     .padding(1)
6196     .kernel_size(3, 3)
6197     .stride(2)
6198     .group_input_channels(23)
6199     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6200     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
6201     .iterations(3)
6202     .TestQU8();
6203 }
6204 
6205 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_qmin) {
6206   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6207   DeconvolutionOperatorTester()
6208     .input_size(kStridedInputHeight, kStridedInputWidth)
6209     .padding(1)
6210     .kernel_size(3, 3)
6211     .stride(2)
6212     .group_input_channels(23)
6213     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6214     .qmin(128)
6215     .iterations(3)
6216     .TestQU8();
6217 }
6218 
6219 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_with_qmax) {
6220   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6221   DeconvolutionOperatorTester()
6222     .input_size(kStridedInputHeight, kStridedInputWidth)
6223     .padding(1)
6224     .kernel_size(3, 3)
6225     .stride(2)
6226     .group_input_channels(23)
6227     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6228     .qmax(128)
6229     .iterations(3)
6230     .TestQU8();
6231 }
6232 
6233 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_without_bias) {
6234   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6235   DeconvolutionOperatorTester()
6236     .has_bias(false)
6237     .input_size(kStridedInputHeight, kStridedInputWidth)
6238     .padding(1)
6239     .kernel_size(3, 3)
6240     .stride(2)
6241     .group_input_channels(23)
6242     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6243     .iterations(3)
6244     .TestQU8();
6245 }
6246 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_3x3s2)6247 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_3x3s2) {
6248   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6249   DeconvolutionOperatorTester()
6250     .input_size(kStridedInputHeight, kStridedInputWidth)
6251     .padding(1)
6252     .kernel_size(3, 3)
6253     .stride(2)
6254     .group_input_channels(15)
6255     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6256     .use_weights_cache(true)
6257     .iterations(3)
6258     .TestQU8();
6259 }
6260 
6261 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
6262 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2)6263 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2) {
6264   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6265   DeconvolutionOperatorTester()
6266     .input_size(kStridedInputHeight, kStridedInputWidth)
6267     .padding(1)
6268     .kernel_size(3, 3)
6269     .stride(2)
6270     .groups(2)
6271     .group_input_channels(17)
6272     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6273     .iterations(3)
6274     .TestQU8();
6275 }
6276 
TEST(DECONVOLUTION_NHWC_QU8,grouped_Kx3s2)6277 TEST(DECONVOLUTION_NHWC_QU8, grouped_Kx3s2) {
6278   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6279   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
6280     DeconvolutionOperatorTester()
6281       .input_size(kStridedInputHeight, kStridedInputWidth)
6282       .padding_width(1)
6283       .kernel_size(kernel_height, 3)
6284       .stride(2)
6285       .groups(2)
6286       .group_input_channels(17)
6287       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6288       .iterations(3)
6289       .TestQU8();
6290   }
6291 }
6292 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3xKs2)6293 TEST(DECONVOLUTION_NHWC_QU8, grouped_3xKs2) {
6294   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6295   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6296     DeconvolutionOperatorTester()
6297       .input_size(kStridedInputHeight, kStridedInputWidth)
6298       .padding_height(1)
6299       .kernel_size(3, kernel_width)
6300       .stride(2)
6301       .groups(2)
6302       .group_input_channels(17)
6303       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6304       .iterations(3)
6305       .TestQU8();
6306   }
6307 }
6308 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3sSx1)6309 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3sSx1) {
6310   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6311   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6312     DeconvolutionOperatorTester()
6313       .input_size(kStridedInputHeight, kStridedInputWidth)
6314       .padding(1)
6315       .padding_width(1)
6316       .kernel_size(3, 3)
6317       .stride_height(stride_height)
6318       .groups(2)
6319       .group_input_channels(17)
6320       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6321       .iterations(3)
6322       .TestQU8();
6323   }
6324 }
6325 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s1xS)6326 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s1xS) {
6327   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6328   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6329     DeconvolutionOperatorTester()
6330       .input_size(kStridedInputHeight, kStridedInputWidth)
6331       .padding(1)
6332       .padding_width(1)
6333       .kernel_size(3, 3)
6334       .stride_width(stride_width)
6335       .groups(2)
6336       .group_input_channels(17)
6337       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6338       .iterations(3)
6339       .TestQU8();
6340   }
6341 }
6342 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_height_padding)6343 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_height_padding) {
6344   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6345   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6346     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6347       DeconvolutionOperatorTester()
6348         .input_size(kStridedInputHeight, kStridedInputWidth)
6349         .padding_width(1)
6350         .padding_top(padding_top)
6351         .padding_bottom(padding_bottom)
6352         .kernel_size(3, 3)
6353         .stride(2)
6354         .groups(2)
6355         .group_input_channels(17)
6356         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6357         .iterations(1)
6358         .TestQU8();
6359     }
6360   }
6361 }
6362 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_width_padding)6363 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_width_padding) {
6364   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6365   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6366     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6367       DeconvolutionOperatorTester()
6368         .input_size(kStridedInputHeight, kStridedInputWidth)
6369         .padding_height(1)
6370         .padding_left(padding_left)
6371         .padding_right(padding_right)
6372         .kernel_size(3, 3)
6373         .stride(2)
6374         .groups(2)
6375         .group_input_channels(17)
6376         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6377         .iterations(1)
6378         .TestQU8();
6379     }
6380   }
6381 }
6382 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_height_adjustment)6383 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_height_adjustment) {
6384   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6385   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
6386     DeconvolutionOperatorTester()
6387       .input_size(kStridedInputHeight, kStridedInputWidth)
6388       .padding(1)
6389       .adjustment_height(adjustment_height)
6390       .kernel_size(3, 3)
6391       .stride(2)
6392       .groups(2)
6393       .group_input_channels(17)
6394       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6395       .iterations(1)
6396       .TestQU8();
6397   }
6398 }
6399 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_width_adjustment)6400 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_width_adjustment) {
6401   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6402   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
6403     DeconvolutionOperatorTester()
6404       .input_size(kStridedInputHeight, kStridedInputWidth)
6405       .padding(1)
6406       .adjustment_width(adjustment_width)
6407       .kernel_size(3, 3)
6408       .stride(2)
6409       .groups(2)
6410       .group_input_channels(17)
6411       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6412       .iterations(1)
6413       .TestQU8();
6414   }
6415 }
6416 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_input_height)6417 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_input_height) {
6418   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6419   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
6420     DeconvolutionOperatorTester()
6421       .input_size(input_height, kStridedInputWidth)
6422       .padding(1)
6423       .kernel_size(3, 3)
6424       .stride(2)
6425       .groups(2)
6426       .group_input_channels(17)
6427       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6428       .iterations(1)
6429       .TestQU8();
6430   }
6431 }
6432 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_input_width)6433 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_input_width) {
6434   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6435   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
6436     DeconvolutionOperatorTester()
6437       .input_size(kStridedInputHeight, kStridedInputWidth)
6438       .padding(1)
6439       .kernel_size(3, 3)
6440       .stride(2)
6441       .groups(2)
6442       .group_input_channels(17)
6443       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6444       .iterations(1)
6445       .TestQU8();
6446   }
6447 }
6448 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_input_channels)6449 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_input_channels) {
6450   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6451   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
6452     DeconvolutionOperatorTester()
6453       .input_size(kStridedInputHeight, kStridedInputWidth)
6454       .padding(1)
6455       .kernel_size(3, 3)
6456       .stride(2)
6457       .groups(2)
6458       .group_input_channels(input_channels)
6459       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6460       .iterations(1)
6461       .TestQU8();
6462   }
6463 }
6464 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_varying_output_channels)6465 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_varying_output_channels) {
6466   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6467   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
6468     DeconvolutionOperatorTester()
6469       .input_size(kStridedInputHeight, kStridedInputWidth)
6470       .padding(1)
6471       .kernel_size(3, 3)
6472       .stride(2)
6473       .groups(2)
6474       .group_input_channels(17)
6475       .group_output_channels(output_channels)
6476       .iterations(1)
6477       .TestQU8();
6478   }
6479 }
6480 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_input_stride)6481 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_input_stride) {
6482   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6483   DeconvolutionOperatorTester()
6484     .input_size(kStridedInputHeight, kStridedInputWidth)
6485     .padding(1)
6486     .kernel_size(3, 3)
6487     .stride(2)
6488     .groups(2)
6489     .group_input_channels(17)
6490     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6491     .input_pixel_stride(37)
6492     .iterations(3)
6493     .TestQU8();
6494 }
6495 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_output_stride)6496 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_output_stride) {
6497   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6498   DeconvolutionOperatorTester()
6499     .input_size(kStridedInputHeight, kStridedInputWidth)
6500     .padding(1)
6501     .kernel_size(3, 3)
6502     .stride(2)
6503     .groups(2)
6504     .group_input_channels(17)
6505     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
6506     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
6507     .iterations(3)
6508     .TestQU8();
6509 }
6510 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_qmin)6511 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_qmin) {
6512   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6513   DeconvolutionOperatorTester()
6514     .input_size(kStridedInputHeight, kStridedInputWidth)
6515     .padding(1)
6516     .kernel_size(3, 3)
6517     .stride(2)
6518     .groups(2)
6519     .group_input_channels(17)
6520     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6521     .qmin(128)
6522     .iterations(3)
6523     .TestQU8();
6524 }
6525 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_with_qmax)6526 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_with_qmax) {
6527   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6528   DeconvolutionOperatorTester()
6529     .input_size(kStridedInputHeight, kStridedInputWidth)
6530     .padding(1)
6531     .kernel_size(3, 3)
6532     .stride(2)
6533     .groups(2)
6534     .group_input_channels(17)
6535     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6536     .qmax(128)
6537     .iterations(3)
6538     .TestQU8();
6539 }
6540 
TEST(DECONVOLUTION_NHWC_QU8,grouped_3x3s2_without_bias)6541 TEST(DECONVOLUTION_NHWC_QU8, grouped_3x3s2_without_bias) {
6542   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6543   DeconvolutionOperatorTester()
6544     .has_bias(false)
6545     .input_size(kStridedInputHeight, kStridedInputWidth)
6546     .padding(1)
6547     .kernel_size(3, 3)
6548     .stride(2)
6549     .groups(2)
6550     .group_input_channels(17)
6551     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6552     .iterations(3)
6553     .TestQU8();
6554 }
6555 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_grouped_3x3s2)6556 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_grouped_3x3s2) {
6557   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6558   DeconvolutionOperatorTester()
6559     .input_size(kStridedInputHeight, kStridedInputWidth)
6560     .padding(1)
6561     .kernel_size(3, 3)
6562     .stride(2)
6563     .groups(2)
6564     .group_input_channels(17)
6565     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6566     .use_weights_cache(true)
6567     .iterations(3)
6568     .TestQU8();
6569 }
6570 
6571 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
6572 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2)6573 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2) {
6574   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6575   DeconvolutionOperatorTester()
6576     .batch_size(2)
6577     .input_size(kStridedInputHeight, kStridedInputWidth)
6578     .padding(1)
6579     .kernel_size(3, 3)
6580     .stride(2)
6581     .group_input_channels(15)
6582     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6583     .iterations(3)
6584     .TestQU8();
6585 }
6586 
TEST(DECONVOLUTION_NHWC_QU8,batched_Kx3s2)6587 TEST(DECONVOLUTION_NHWC_QU8, batched_Kx3s2) {
6588   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6589   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
6590     DeconvolutionOperatorTester()
6591       .batch_size(2)
6592       .input_size(kStridedInputHeight, kStridedInputWidth)
6593       .padding_width(1)
6594       .kernel_size(kernel_height, 3)
6595       .stride(2)
6596       .group_input_channels(17)
6597       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6598       .iterations(3)
6599       .TestQU8();
6600   }
6601 }
6602 
TEST(DECONVOLUTION_NHWC_QU8,batched_3xKs2)6603 TEST(DECONVOLUTION_NHWC_QU8, batched_3xKs2) {
6604   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6605   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6606     DeconvolutionOperatorTester()
6607       .batch_size(2)
6608       .input_size(kStridedInputHeight, kStridedInputWidth)
6609       .padding_height(1)
6610       .kernel_size(3, kernel_width)
6611       .stride(2)
6612       .group_input_channels(17)
6613       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6614       .iterations(3)
6615       .TestQU8();
6616   }
6617 }
6618 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3sSx1)6619 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3sSx1) {
6620   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6621   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6622     DeconvolutionOperatorTester()
6623       .batch_size(2)
6624       .input_size(kStridedInputHeight, kStridedInputWidth)
6625       .padding(1)
6626       .padding_width(1)
6627       .kernel_size(3, 3)
6628       .stride_height(stride_height)
6629       .group_input_channels(17)
6630       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6631       .iterations(3)
6632       .TestQU8();
6633   }
6634 }
6635 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s1xS)6636 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s1xS) {
6637   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6638   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6639     DeconvolutionOperatorTester()
6640       .batch_size(2)
6641       .input_size(kStridedInputHeight, kStridedInputWidth)
6642       .padding(1)
6643       .padding_width(1)
6644       .kernel_size(3, 3)
6645       .stride_width(stride_width)
6646       .group_input_channels(17)
6647       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6648       .iterations(3)
6649       .TestQU8();
6650   }
6651 }
6652 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_height_padding)6653 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_height_padding) {
6654   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6655   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6656     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6657       DeconvolutionOperatorTester()
6658         .batch_size(2)
6659         .input_size(kStridedInputHeight, kStridedInputWidth)
6660         .padding_width(1)
6661         .padding_top(padding_top)
6662         .padding_bottom(padding_bottom)
6663         .kernel_size(3, 3)
6664         .stride(2)
6665         .group_input_channels(15)
6666         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6667         .iterations(1)
6668         .TestQU8();
6669     }
6670   }
6671 }
6672 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_width_padding)6673 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_width_padding) {
6674   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6675   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6676     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6677       DeconvolutionOperatorTester()
6678         .batch_size(2)
6679         .input_size(kStridedInputHeight, kStridedInputWidth)
6680         .padding_height(1)
6681         .padding_left(padding_left)
6682         .padding_right(padding_right)
6683         .kernel_size(3, 3)
6684         .stride(2)
6685         .group_input_channels(15)
6686         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6687         .iterations(1)
6688         .TestQU8();
6689     }
6690   }
6691 }
6692 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_height_adjustment)6693 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_height_adjustment) {
6694   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6695   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
6696     DeconvolutionOperatorTester()
6697       .batch_size(2)
6698       .input_size(kStridedInputHeight, kStridedInputWidth)
6699       .padding(1)
6700       .adjustment_height(adjustment_height)
6701       .kernel_size(3, 3)
6702       .stride(2)
6703       .group_input_channels(15)
6704       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6705       .iterations(1)
6706       .TestQU8();
6707   }
6708 }
6709 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_width_adjustment)6710 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_width_adjustment) {
6711   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6712   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
6713     DeconvolutionOperatorTester()
6714       .batch_size(2)
6715       .input_size(kStridedInputHeight, kStridedInputWidth)
6716       .padding(1)
6717       .adjustment_width(adjustment_width)
6718       .kernel_size(3, 3)
6719       .stride(2)
6720       .group_input_channels(15)
6721       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6722       .iterations(1)
6723       .TestQU8();
6724   }
6725 }
6726 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_input_height)6727 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_input_height) {
6728   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6729   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
6730     DeconvolutionOperatorTester()
6731       .batch_size(2)
6732       .input_size(input_height, kStridedInputWidth)
6733       .padding(1)
6734       .kernel_size(3, 3)
6735       .stride(2)
6736       .group_input_channels(15)
6737       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6738       .iterations(1)
6739       .TestQU8();
6740   }
6741 }
6742 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_input_width)6743 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_input_width) {
6744   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6745   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
6746     DeconvolutionOperatorTester()
6747       .batch_size(2)
6748       .input_size(kStridedInputHeight, kStridedInputWidth)
6749       .padding(1)
6750       .kernel_size(3, 3)
6751       .stride(2)
6752       .group_input_channels(15)
6753       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6754       .iterations(1)
6755       .TestQU8();
6756   }
6757 }
6758 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_input_channels)6759 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_input_channels) {
6760   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6761   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
6762     DeconvolutionOperatorTester()
6763       .batch_size(2)
6764       .input_size(kStridedInputHeight, kStridedInputWidth)
6765       .padding(1)
6766       .kernel_size(3, 3)
6767       .stride(2)
6768       .group_input_channels(input_channels)
6769       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6770       .iterations(1)
6771       .TestQU8();
6772   }
6773 }
6774 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_varying_output_channels)6775 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_varying_output_channels) {
6776   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6777   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
6778     DeconvolutionOperatorTester()
6779       .batch_size(2)
6780       .input_size(kStridedInputHeight, kStridedInputWidth)
6781       .padding(1)
6782       .kernel_size(3, 3)
6783       .stride(2)
6784       .group_input_channels(23)
6785       .group_output_channels(output_channels)
6786       .iterations(1)
6787       .TestQU8();
6788   }
6789 }
6790 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_input_stride)6791 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_input_stride) {
6792   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6793   DeconvolutionOperatorTester()
6794     .batch_size(2)
6795     .input_size(kStridedInputHeight, kStridedInputWidth)
6796     .padding(1)
6797     .kernel_size(3, 3)
6798     .stride(2)
6799     .group_input_channels(23)
6800     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6801     .input_pixel_stride(28)
6802     .iterations(3)
6803     .TestQU8();
6804 }
6805 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_output_stride)6806 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_output_stride) {
6807   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6808   DeconvolutionOperatorTester()
6809     .batch_size(2)
6810     .input_size(kStridedInputHeight, kStridedInputWidth)
6811     .padding(1)
6812     .kernel_size(3, 3)
6813     .stride(2)
6814     .group_input_channels(23)
6815     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6816     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
6817     .iterations(3)
6818     .TestQU8();
6819 }
6820 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_qmin)6821 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_qmin) {
6822   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6823   DeconvolutionOperatorTester()
6824     .batch_size(2)
6825     .input_size(kStridedInputHeight, kStridedInputWidth)
6826     .padding(1)
6827     .kernel_size(3, 3)
6828     .stride(2)
6829     .group_input_channels(23)
6830     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6831     .qmin(128)
6832     .iterations(3)
6833     .TestQU8();
6834 }
6835 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_with_qmax)6836 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_with_qmax) {
6837   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6838   DeconvolutionOperatorTester()
6839     .batch_size(2)
6840     .input_size(kStridedInputHeight, kStridedInputWidth)
6841     .padding(1)
6842     .kernel_size(3, 3)
6843     .stride(2)
6844     .group_input_channels(23)
6845     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6846     .qmax(128)
6847     .iterations(3)
6848     .TestQU8();
6849 }
6850 
TEST(DECONVOLUTION_NHWC_QU8,batched_3x3s2_without_bias)6851 TEST(DECONVOLUTION_NHWC_QU8, batched_3x3s2_without_bias) {
6852   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6853   DeconvolutionOperatorTester()
6854     .has_bias(false)
6855     .batch_size(2)
6856     .input_size(kStridedInputHeight, kStridedInputWidth)
6857     .padding(1)
6858     .kernel_size(3, 3)
6859     .stride(2)
6860     .group_input_channels(23)
6861     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6862     .iterations(3)
6863     .TestQU8();
6864 }
6865 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_3x3s2)6866 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_3x3s2) {
6867   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6868   DeconvolutionOperatorTester()
6869     .batch_size(2)
6870     .input_size(kStridedInputHeight, kStridedInputWidth)
6871     .padding(1)
6872     .kernel_size(3, 3)
6873     .stride(2)
6874     .group_input_channels(15)
6875     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6876     .use_weights_cache(true)
6877     .iterations(3)
6878     .TestQU8();
6879 }
6880 
6881 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
6882 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2)6883 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2) {
6884   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6885   DeconvolutionOperatorTester()
6886     .batch_size(2)
6887     .input_size(kStridedInputHeight, kStridedInputWidth)
6888     .padding(1)
6889     .kernel_size(3, 3)
6890     .stride(2)
6891     .groups(2)
6892     .group_input_channels(17)
6893     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6894     .iterations(3)
6895     .TestQU8();
6896 }
6897 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_Kx3s2)6898 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_Kx3s2) {
6899   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6900   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
6901     DeconvolutionOperatorTester()
6902       .batch_size(2)
6903       .input_size(kStridedInputHeight, kStridedInputWidth)
6904       .padding_width(1)
6905       .kernel_size(kernel_height, 3)
6906       .stride(2)
6907       .groups(2)
6908       .group_input_channels(17)
6909       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6910       .iterations(3)
6911       .TestQU8();
6912   }
6913 }
6914 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3xKs2)6915 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3xKs2) {
6916   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6917   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
6918     DeconvolutionOperatorTester()
6919       .batch_size(2)
6920       .input_size(kStridedInputHeight, kStridedInputWidth)
6921       .padding_height(1)
6922       .kernel_size(3, kernel_width)
6923       .stride(2)
6924       .groups(2)
6925       .group_input_channels(17)
6926       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6927       .iterations(3)
6928       .TestQU8();
6929   }
6930 }
6931 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3sSx1)6932 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3sSx1) {
6933   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6934   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
6935     DeconvolutionOperatorTester()
6936       .batch_size(2)
6937       .input_size(kStridedInputHeight, kStridedInputWidth)
6938       .padding(1)
6939       .padding_width(1)
6940       .kernel_size(3, 3)
6941       .stride_height(stride_height)
6942       .groups(2)
6943       .group_input_channels(17)
6944       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6945       .iterations(3)
6946       .TestQU8();
6947   }
6948 }
6949 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s1xS)6950 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s1xS) {
6951   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6952   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
6953     DeconvolutionOperatorTester()
6954       .batch_size(2)
6955       .input_size(kStridedInputHeight, kStridedInputWidth)
6956       .padding(1)
6957       .padding_width(1)
6958       .kernel_size(3, 3)
6959       .stride_width(stride_width)
6960       .groups(2)
6961       .group_input_channels(17)
6962       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6963       .iterations(3)
6964       .TestQU8();
6965   }
6966 }
6967 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_height_padding)6968 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_height_padding) {
6969   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6970   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
6971     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
6972       DeconvolutionOperatorTester()
6973         .batch_size(2)
6974         .input_size(kStridedInputHeight, kStridedInputWidth)
6975         .padding_width(1)
6976         .padding_top(padding_top)
6977         .padding_bottom(padding_bottom)
6978         .kernel_size(3, 3)
6979         .stride(2)
6980         .groups(2)
6981         .group_input_channels(17)
6982         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
6983         .iterations(1)
6984         .TestQU8();
6985     }
6986   }
6987 }
6988 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_width_padding)6989 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_width_padding) {
6990   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
6991   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
6992     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
6993       DeconvolutionOperatorTester()
6994         .batch_size(2)
6995         .input_size(kStridedInputHeight, kStridedInputWidth)
6996         .padding_height(1)
6997         .padding_left(padding_left)
6998         .padding_right(padding_right)
6999         .kernel_size(3, 3)
7000         .stride(2)
7001         .groups(2)
7002         .group_input_channels(17)
7003         .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7004         .iterations(1)
7005         .TestQU8();
7006     }
7007   }
7008 }
7009 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_height_adjustment)7010 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_height_adjustment) {
7011   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7012   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
7013     DeconvolutionOperatorTester()
7014       .batch_size(2)
7015       .input_size(kStridedInputHeight, kStridedInputWidth)
7016       .padding(1)
7017       .adjustment_height(adjustment_height)
7018       .kernel_size(3, 3)
7019       .stride(2)
7020       .groups(2)
7021       .group_input_channels(17)
7022       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7023       .iterations(1)
7024       .TestQU8();
7025   }
7026 }
7027 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_width_adjustment)7028 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_width_adjustment) {
7029   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7030   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
7031     DeconvolutionOperatorTester()
7032       .batch_size(2)
7033       .input_size(kStridedInputHeight, kStridedInputWidth)
7034       .padding(1)
7035       .adjustment_width(adjustment_width)
7036       .kernel_size(3, 3)
7037       .stride(2)
7038       .groups(2)
7039       .group_input_channels(17)
7040       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7041       .iterations(1)
7042       .TestQU8();
7043   }
7044 }
7045 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_input_height)7046 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_input_height) {
7047   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7048   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7049     DeconvolutionOperatorTester()
7050       .batch_size(2)
7051       .input_size(input_height, kStridedInputWidth)
7052       .padding(1)
7053       .kernel_size(3, 3)
7054       .stride(2)
7055       .groups(2)
7056       .group_input_channels(17)
7057       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7058       .iterations(1)
7059       .TestQU8();
7060   }
7061 }
7062 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_input_width)7063 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_input_width) {
7064   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7065   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7066     DeconvolutionOperatorTester()
7067       .batch_size(2)
7068       .input_size(kStridedInputHeight, kStridedInputWidth)
7069       .padding(1)
7070       .kernel_size(3, 3)
7071       .stride(2)
7072       .groups(2)
7073       .group_input_channels(17)
7074       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7075       .iterations(1)
7076       .TestQU8();
7077   }
7078 }
7079 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_input_channels)7080 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_input_channels) {
7081   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7082   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
7083     DeconvolutionOperatorTester()
7084       .batch_size(2)
7085       .input_size(kStridedInputHeight, kStridedInputWidth)
7086       .padding(1)
7087       .kernel_size(3, 3)
7088       .stride(2)
7089       .groups(2)
7090       .group_input_channels(input_channels)
7091       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7092       .iterations(1)
7093       .TestQU8();
7094   }
7095 }
7096 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_varying_output_channels)7097 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_varying_output_channels) {
7098   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7099   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7100     DeconvolutionOperatorTester()
7101       .batch_size(2)
7102       .input_size(kStridedInputHeight, kStridedInputWidth)
7103       .padding(1)
7104       .kernel_size(3, 3)
7105       .stride(2)
7106       .groups(2)
7107       .group_input_channels(17)
7108       .group_output_channels(output_channels)
7109       .iterations(1)
7110       .TestQU8();
7111   }
7112 }
7113 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_input_stride)7114 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_input_stride) {
7115   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7116   DeconvolutionOperatorTester()
7117     .batch_size(2)
7118     .input_size(kStridedInputHeight, kStridedInputWidth)
7119     .padding(1)
7120     .kernel_size(3, 3)
7121     .stride(2)
7122     .groups(2)
7123     .group_input_channels(17)
7124     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7125     .input_pixel_stride(37)
7126     .iterations(3)
7127     .TestQU8();
7128 }
7129 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_output_stride)7130 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_output_stride) {
7131   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7132   DeconvolutionOperatorTester()
7133     .batch_size(2)
7134     .input_size(kStridedInputHeight, kStridedInputWidth)
7135     .padding(1)
7136     .kernel_size(3, 3)
7137     .stride(2)
7138     .groups(2)
7139     .group_input_channels(17)
7140     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
7141     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7142     .iterations(3)
7143     .TestQU8();
7144 }
7145 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_qmin)7146 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_qmin) {
7147   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7148   DeconvolutionOperatorTester()
7149     .batch_size(2)
7150     .input_size(kStridedInputHeight, kStridedInputWidth)
7151     .padding(1)
7152     .kernel_size(3, 3)
7153     .stride(2)
7154     .groups(2)
7155     .group_input_channels(17)
7156     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7157     .qmin(128)
7158     .iterations(3)
7159     .TestQU8();
7160 }
7161 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_with_qmax)7162 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_with_qmax) {
7163   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7164   DeconvolutionOperatorTester()
7165     .batch_size(2)
7166     .input_size(kStridedInputHeight, kStridedInputWidth)
7167     .padding(1)
7168     .kernel_size(3, 3)
7169     .stride(2)
7170     .groups(2)
7171     .group_input_channels(17)
7172     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7173     .qmax(128)
7174     .iterations(3)
7175     .TestQU8();
7176 }
7177 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_3x3s2_without_bias)7178 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_3x3s2_without_bias) {
7179   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7180   DeconvolutionOperatorTester()
7181     .has_bias(false)
7182     .batch_size(2)
7183     .input_size(kStridedInputHeight, kStridedInputWidth)
7184     .padding(1)
7185     .kernel_size(3, 3)
7186     .stride(2)
7187     .groups(2)
7188     .group_input_channels(17)
7189     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7190     .iterations(3)
7191     .TestQU8();
7192 }
7193 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_grouped_3x3s2)7194 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_grouped_3x3s2) {
7195   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7196   DeconvolutionOperatorTester()
7197     .batch_size(2)
7198     .input_size(kStridedInputHeight, kStridedInputWidth)
7199     .padding(1)
7200     .kernel_size(3, 3)
7201     .stride(2)
7202     .groups(2)
7203     .group_input_channels(17)
7204     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7205     .use_weights_cache(true)
7206     .iterations(3)
7207     .TestQU8();
7208 }
7209 
7210 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
7211 
7212 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_setup_changing_batch) {
7213   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7214   DeconvolutionOperatorTester()
7215     .batch_size(2)
7216     .next_batch_size(5)
7217     .input_size(kStridedInputHeight, kStridedInputWidth)
7218     .kernel_height(3)
7219     .kernel_width(5)
7220     .stride(2)
7221     .groups(2)
7222     .group_input_channels(15)
7223     .group_output_channels(17)
7224     .TestSetupQU8();
7225 }
7226 
7227 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_setup_changing_height) {
7228   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7229   DeconvolutionOperatorTester()
7230     .batch_size(2)
7231     .input_size(kStridedInputHeight, kStridedInputWidth)
7232     .next_input_height(kStridedInputHeight + 3)
7233     .kernel_height(3)
7234     .kernel_width(5)
7235     .stride(2)
7236     .groups(2)
7237     .group_input_channels(15)
7238     .group_output_channels(17)
7239     .TestSetupQU8();
7240 }
7241 
7242 TEST(DECONVOLUTION_NHWC_QU8, 3x3s2_setup_changing_width) {
7243   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7244   DeconvolutionOperatorTester()
7245     .batch_size(2)
7246     .input_size(kStridedInputHeight, kStridedInputWidth)
7247     .next_input_width(kStridedInputWidth + 3)
7248     .kernel_height(3)
7249     .kernel_width(5)
7250     .stride(2)
7251     .groups(2)
7252     .group_input_channels(15)
7253     .group_output_channels(17)
7254     .TestSetupQU8();
7255 }
7256 
7257 /**************************** SUBCONV2D/GEMM path ****************************/
7258 
7259 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2) {
7260   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7261   DeconvolutionOperatorTester()
7262     .input_size(kStridedInputHeight, kStridedInputWidth)
7263     .kernel_size(2, 2)
7264     .stride(2)
7265     .group_input_channels(15)
7266     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7267     .iterations(3)
7268     .TestQU8();
7269 }
7270 
TEST(DECONVOLUTION_NHWC_QU8,Kx2sKx2)7271 TEST(DECONVOLUTION_NHWC_QU8, Kx2sKx2) {
7272   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7273   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7274     DeconvolutionOperatorTester()
7275       .input_size(kStridedInputHeight, kStridedInputWidth)
7276       .kernel_size(kernel_height, 2)
7277       .stride(kernel_height, 2)
7278       .group_input_channels(17)
7279       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7280       .iterations(3)
7281       .TestQU8();
7282   }
7283 }
7284 
7285 TEST(DECONVOLUTION_NHWC_QU8, 2xKs2xK) {
7286   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7287   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7288     DeconvolutionOperatorTester()
7289       .input_size(kStridedInputHeight, kStridedInputWidth)
7290       .kernel_size(2, kernel_width)
7291       .stride(2, kernel_width)
7292       .group_input_channels(17)
7293       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7294       .iterations(3)
7295       .TestQU8();
7296   }
7297 }
7298 
7299 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_height_adjustment) {
7300   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7301   DeconvolutionOperatorTester()
7302     .input_size(kStridedInputHeight, kStridedInputWidth)
7303     .adjustment_height(1)
7304     .kernel_size(2, 2)
7305     .stride(2)
7306     .group_input_channels(15)
7307     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7308     .iterations(1)
7309     .TestQU8();
7310 }
7311 
7312 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_width_adjustment) {
7313   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7314   DeconvolutionOperatorTester()
7315     .input_size(kStridedInputHeight, kStridedInputWidth)
7316     .adjustment_width(1)
7317     .kernel_size(2, 2)
7318     .stride(2)
7319     .group_input_channels(15)
7320     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7321     .iterations(1)
7322     .TestQU8();
7323 }
7324 
7325 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_input_height) {
7326   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7327   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7328     DeconvolutionOperatorTester()
7329       .input_size(input_height, kStridedInputWidth)
7330       .kernel_size(2, 2)
7331       .stride(2)
7332       .group_input_channels(15)
7333       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7334       .iterations(1)
7335       .TestQU8();
7336   }
7337 }
7338 
7339 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_input_width) {
7340   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7341   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7342     DeconvolutionOperatorTester()
7343       .input_size(kStridedInputHeight, kStridedInputWidth)
7344       .kernel_size(2, 2)
7345       .stride(2)
7346       .group_input_channels(15)
7347       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7348       .iterations(1)
7349       .TestQU8();
7350   }
7351 }
7352 
7353 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_input_channels) {
7354   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7355   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
7356     DeconvolutionOperatorTester()
7357       .input_size(kStridedInputHeight, kStridedInputWidth)
7358       .kernel_size(2, 2)
7359       .stride(2)
7360       .group_input_channels(input_channels)
7361       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7362       .iterations(1)
7363       .TestQU8();
7364   }
7365 }
7366 
7367 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_varying_output_channels) {
7368   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7369   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7370     DeconvolutionOperatorTester()
7371       .input_size(kStridedInputHeight, kStridedInputWidth)
7372       .kernel_size(2, 2)
7373       .stride(2)
7374       .group_input_channels(23)
7375       .group_output_channels(output_channels)
7376       .iterations(1)
7377       .TestQU8();
7378   }
7379 }
7380 
7381 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_input_stride) {
7382   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7383   DeconvolutionOperatorTester()
7384     .input_size(kStridedInputHeight, kStridedInputWidth)
7385     .kernel_size(2, 2)
7386     .stride(2)
7387     .group_input_channels(23)
7388     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7389     .input_pixel_stride(28)
7390     .iterations(3)
7391     .TestQU8();
7392 }
7393 
7394 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_output_stride) {
7395   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7396   DeconvolutionOperatorTester()
7397     .input_size(kStridedInputHeight, kStridedInputWidth)
7398     .kernel_size(2, 2)
7399     .stride(2)
7400     .group_input_channels(23)
7401     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7402     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7403     .iterations(3)
7404     .TestQU8();
7405 }
7406 
7407 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_qmin) {
7408   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7409   DeconvolutionOperatorTester()
7410     .input_size(kStridedInputHeight, kStridedInputWidth)
7411     .kernel_size(2, 2)
7412     .stride(2)
7413     .group_input_channels(23)
7414     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7415     .qmin(128)
7416     .iterations(3)
7417     .TestQU8();
7418 }
7419 
7420 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_with_qmax) {
7421   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7422   DeconvolutionOperatorTester()
7423     .input_size(kStridedInputHeight, kStridedInputWidth)
7424     .kernel_size(2, 2)
7425     .stride(2)
7426     .group_input_channels(23)
7427     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7428     .qmax(128)
7429     .iterations(3)
7430     .TestQU8();
7431 }
7432 
7433 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_without_bias) {
7434   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7435   DeconvolutionOperatorTester()
7436     .has_bias(false)
7437     .input_size(kStridedInputHeight, kStridedInputWidth)
7438     .kernel_size(2, 2)
7439     .stride(2)
7440     .group_input_channels(23)
7441     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7442     .iterations(3)
7443     .TestQU8();
7444 }
7445 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_2x2s2)7446 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_2x2s2) {
7447   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7448   DeconvolutionOperatorTester()
7449     .input_size(kStridedInputHeight, kStridedInputWidth)
7450     .kernel_size(2, 2)
7451     .stride(2)
7452     .group_input_channels(15)
7453     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7454     .use_weights_cache(true)
7455     .iterations(3)
7456     .TestQU8();
7457 }
7458 
7459 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
7460 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2)7461 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2) {
7462   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7463   DeconvolutionOperatorTester()
7464     .input_size(kStridedInputHeight, kStridedInputWidth)
7465     .kernel_size(2, 2)
7466     .stride(2)
7467     .groups(2)
7468     .group_input_channels(17)
7469     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7470     .iterations(3)
7471     .TestQU8();
7472 }
7473 
TEST(DECONVOLUTION_NHWC_QU8,grouped_Kx2sKx2)7474 TEST(DECONVOLUTION_NHWC_QU8, grouped_Kx2sKx2) {
7475   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7476   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7477     DeconvolutionOperatorTester()
7478       .input_size(kStridedInputHeight, kStridedInputWidth)
7479       .kernel_size(kernel_height, 2)
7480       .stride(kernel_height, 2)
7481       .groups(2)
7482       .group_input_channels(17)
7483       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7484       .iterations(3)
7485       .TestQU8();
7486   }
7487 }
7488 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2xKs2xK)7489 TEST(DECONVOLUTION_NHWC_QU8, grouped_2xKs2xK) {
7490   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7491   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7492     DeconvolutionOperatorTester()
7493       .input_size(kStridedInputHeight, kStridedInputWidth)
7494       .kernel_size(2, kernel_width)
7495       .stride(2, kernel_width)
7496       .groups(2)
7497       .group_input_channels(17)
7498       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7499       .iterations(3)
7500       .TestQU8();
7501   }
7502 }
7503 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_height_adjustment)7504 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_height_adjustment) {
7505   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7506   DeconvolutionOperatorTester()
7507     .input_size(kStridedInputHeight, kStridedInputWidth)
7508     .adjustment_height(1)
7509     .kernel_size(2, 2)
7510     .stride(2)
7511     .groups(2)
7512     .group_input_channels(17)
7513     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7514     .iterations(1)
7515     .TestQU8();
7516 }
7517 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_width_adjustment)7518 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_width_adjustment) {
7519   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7520   DeconvolutionOperatorTester()
7521     .input_size(kStridedInputHeight, kStridedInputWidth)
7522     .adjustment_width(1)
7523     .kernel_size(2, 2)
7524     .stride(2)
7525     .groups(2)
7526     .group_input_channels(17)
7527     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7528     .iterations(1)
7529     .TestQU8();
7530 }
7531 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_input_height)7532 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_input_height) {
7533   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7534   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7535     DeconvolutionOperatorTester()
7536       .input_size(input_height, kStridedInputWidth)
7537       .kernel_size(2, 2)
7538       .stride(2)
7539       .groups(2)
7540       .group_input_channels(17)
7541       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7542       .iterations(1)
7543       .TestQU8();
7544   }
7545 }
7546 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_input_width)7547 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_input_width) {
7548   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7549   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7550     DeconvolutionOperatorTester()
7551       .input_size(kStridedInputHeight, kStridedInputWidth)
7552       .kernel_size(2, 2)
7553       .stride(2)
7554       .groups(2)
7555       .group_input_channels(17)
7556       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7557       .iterations(1)
7558       .TestQU8();
7559   }
7560 }
7561 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_input_channels)7562 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_input_channels) {
7563   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7564   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
7565     DeconvolutionOperatorTester()
7566       .input_size(kStridedInputHeight, kStridedInputWidth)
7567       .kernel_size(2, 2)
7568       .stride(2)
7569       .groups(2)
7570       .group_input_channels(input_channels)
7571       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7572       .iterations(1)
7573       .TestQU8();
7574   }
7575 }
7576 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_varying_output_channels)7577 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_varying_output_channels) {
7578   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7579   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7580     DeconvolutionOperatorTester()
7581       .input_size(kStridedInputHeight, kStridedInputWidth)
7582       .kernel_size(2, 2)
7583       .stride(2)
7584       .groups(2)
7585       .group_input_channels(17)
7586       .group_output_channels(output_channels)
7587       .iterations(1)
7588       .TestQU8();
7589   }
7590 }
7591 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_input_stride)7592 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_input_stride) {
7593   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7594   DeconvolutionOperatorTester()
7595     .input_size(kStridedInputHeight, kStridedInputWidth)
7596     .kernel_size(2, 2)
7597     .stride(2)
7598     .groups(2)
7599     .group_input_channels(17)
7600     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7601     .input_pixel_stride(37)
7602     .iterations(3)
7603     .TestQU8();
7604 }
7605 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_output_stride)7606 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_output_stride) {
7607   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7608   DeconvolutionOperatorTester()
7609     .input_size(kStridedInputHeight, kStridedInputWidth)
7610     .kernel_size(2, 2)
7611     .stride(2)
7612     .groups(2)
7613     .group_input_channels(17)
7614     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
7615     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7616     .iterations(3)
7617     .TestQU8();
7618 }
7619 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_qmin)7620 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_qmin) {
7621   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7622   DeconvolutionOperatorTester()
7623     .input_size(kStridedInputHeight, kStridedInputWidth)
7624     .kernel_size(2, 2)
7625     .stride(2)
7626     .groups(2)
7627     .group_input_channels(17)
7628     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7629     .qmin(128)
7630     .iterations(3)
7631     .TestQU8();
7632 }
7633 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_with_qmax)7634 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_with_qmax) {
7635   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7636   DeconvolutionOperatorTester()
7637     .input_size(kStridedInputHeight, kStridedInputWidth)
7638     .kernel_size(2, 2)
7639     .stride(2)
7640     .groups(2)
7641     .group_input_channels(17)
7642     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7643     .qmax(128)
7644     .iterations(3)
7645     .TestQU8();
7646 }
7647 
TEST(DECONVOLUTION_NHWC_QU8,grouped_2x2s2_without_bias)7648 TEST(DECONVOLUTION_NHWC_QU8, grouped_2x2s2_without_bias) {
7649   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7650   DeconvolutionOperatorTester()
7651     .has_bias(false)
7652     .input_size(kStridedInputHeight, kStridedInputWidth)
7653     .kernel_size(2, 2)
7654     .stride(2)
7655     .groups(2)
7656     .group_input_channels(17)
7657     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7658     .iterations(3)
7659     .TestQU8();
7660 }
7661 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_grouped_2x2s2)7662 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_grouped_2x2s2) {
7663   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7664   DeconvolutionOperatorTester()
7665     .input_size(kStridedInputHeight, kStridedInputWidth)
7666     .kernel_size(2, 2)
7667     .stride(2)
7668     .groups(2)
7669     .group_input_channels(17)
7670     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7671     .use_weights_cache(true)
7672     .iterations(3)
7673     .TestQU8();
7674 }
7675 
7676 /**************************** SUBCONV2D/GEMM path, batched ****************************/
7677 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2)7678 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2) {
7679   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7680   DeconvolutionOperatorTester()
7681     .batch_size(2)
7682     .input_size(kStridedInputHeight, kStridedInputWidth)
7683     .kernel_size(2, 2)
7684     .stride(2)
7685     .group_input_channels(15)
7686     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7687     .iterations(3)
7688     .TestQU8();
7689 }
7690 
TEST(DECONVOLUTION_NHWC_QU8,batched_Kx2sKx2)7691 TEST(DECONVOLUTION_NHWC_QU8, batched_Kx2sKx2) {
7692   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7693   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7694     DeconvolutionOperatorTester()
7695       .batch_size(2)
7696       .input_size(kStridedInputHeight, kStridedInputWidth)
7697       .kernel_size(kernel_height, 2)
7698       .stride(kernel_height, 2)
7699       .group_input_channels(17)
7700       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7701       .iterations(3)
7702       .TestQU8();
7703   }
7704 }
7705 
TEST(DECONVOLUTION_NHWC_QU8,batched_2xKs2xK)7706 TEST(DECONVOLUTION_NHWC_QU8, batched_2xKs2xK) {
7707   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7708   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7709     DeconvolutionOperatorTester()
7710       .batch_size(2)
7711       .input_size(kStridedInputHeight, kStridedInputWidth)
7712       .kernel_size(2, kernel_width)
7713       .stride(2, kernel_width)
7714       .group_input_channels(17)
7715       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7716       .iterations(3)
7717       .TestQU8();
7718   }
7719 }
7720 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_height_adjustment)7721 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_height_adjustment) {
7722   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7723   DeconvolutionOperatorTester()
7724     .batch_size(2)
7725     .input_size(kStridedInputHeight, kStridedInputWidth)
7726     .adjustment_height(1)
7727     .kernel_size(2, 2)
7728     .stride(2)
7729     .group_input_channels(15)
7730     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7731     .iterations(1)
7732     .TestQU8();
7733 }
7734 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_width_adjustment)7735 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_width_adjustment) {
7736   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7737   DeconvolutionOperatorTester()
7738     .batch_size(2)
7739     .input_size(kStridedInputHeight, kStridedInputWidth)
7740     .adjustment_width(1)
7741     .kernel_size(2, 2)
7742     .stride(2)
7743     .group_input_channels(15)
7744     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7745     .iterations(1)
7746     .TestQU8();
7747 }
7748 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_input_height)7749 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_input_height) {
7750   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7751   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7752     DeconvolutionOperatorTester()
7753       .batch_size(2)
7754       .input_size(input_height, kStridedInputWidth)
7755       .kernel_size(2, 2)
7756       .stride(2)
7757       .group_input_channels(15)
7758       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7759       .iterations(1)
7760       .TestQU8();
7761   }
7762 }
7763 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_input_width)7764 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_input_width) {
7765   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7766   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7767     DeconvolutionOperatorTester()
7768       .batch_size(2)
7769       .input_size(kStridedInputHeight, kStridedInputWidth)
7770       .kernel_size(2, 2)
7771       .stride(2)
7772       .group_input_channels(15)
7773       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7774       .iterations(1)
7775       .TestQU8();
7776   }
7777 }
7778 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_input_channels)7779 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_input_channels) {
7780   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7781   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
7782     DeconvolutionOperatorTester()
7783       .batch_size(2)
7784       .input_size(kStridedInputHeight, kStridedInputWidth)
7785       .kernel_size(2, 2)
7786       .stride(2)
7787       .group_input_channels(input_channels)
7788       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7789       .iterations(1)
7790       .TestQU8();
7791   }
7792 }
7793 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_varying_output_channels)7794 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_varying_output_channels) {
7795   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7796   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
7797     DeconvolutionOperatorTester()
7798       .batch_size(2)
7799       .input_size(kStridedInputHeight, kStridedInputWidth)
7800       .kernel_size(2, 2)
7801       .stride(2)
7802       .group_input_channels(23)
7803       .group_output_channels(output_channels)
7804       .iterations(1)
7805       .TestQU8();
7806   }
7807 }
7808 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_input_stride)7809 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_input_stride) {
7810   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7811   DeconvolutionOperatorTester()
7812     .batch_size(2)
7813     .input_size(kStridedInputHeight, kStridedInputWidth)
7814     .kernel_size(2, 2)
7815     .stride(2)
7816     .group_input_channels(23)
7817     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7818     .input_pixel_stride(28)
7819     .iterations(3)
7820     .TestQU8();
7821 }
7822 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_output_stride)7823 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_output_stride) {
7824   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7825   DeconvolutionOperatorTester()
7826     .batch_size(2)
7827     .input_size(kStridedInputHeight, kStridedInputWidth)
7828     .kernel_size(2, 2)
7829     .stride(2)
7830     .group_input_channels(23)
7831     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7832     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
7833     .iterations(3)
7834     .TestQU8();
7835 }
7836 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_qmin)7837 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_qmin) {
7838   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7839   DeconvolutionOperatorTester()
7840     .batch_size(2)
7841     .input_size(kStridedInputHeight, kStridedInputWidth)
7842     .kernel_size(2, 2)
7843     .stride(2)
7844     .group_input_channels(23)
7845     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7846     .qmin(128)
7847     .iterations(3)
7848     .TestQU8();
7849 }
7850 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_with_qmax)7851 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_with_qmax) {
7852   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7853   DeconvolutionOperatorTester()
7854     .batch_size(2)
7855     .input_size(kStridedInputHeight, kStridedInputWidth)
7856     .kernel_size(2, 2)
7857     .stride(2)
7858     .group_input_channels(23)
7859     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7860     .qmax(128)
7861     .iterations(3)
7862     .TestQU8();
7863 }
7864 
TEST(DECONVOLUTION_NHWC_QU8,batched_2x2s2_without_bias)7865 TEST(DECONVOLUTION_NHWC_QU8, batched_2x2s2_without_bias) {
7866   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7867   DeconvolutionOperatorTester()
7868     .has_bias(false)
7869     .batch_size(2)
7870     .input_size(kStridedInputHeight, kStridedInputWidth)
7871     .kernel_size(2, 2)
7872     .stride(2)
7873     .group_input_channels(23)
7874     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7875     .iterations(3)
7876     .TestQU8();
7877 }
7878 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_2x2s2)7879 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_2x2s2) {
7880   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7881   DeconvolutionOperatorTester()
7882     .batch_size(2)
7883     .input_size(kStridedInputHeight, kStridedInputWidth)
7884     .kernel_size(2, 2)
7885     .stride(2)
7886     .group_input_channels(15)
7887     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7888     .use_weights_cache(true)
7889     .iterations(3)
7890     .TestQU8();
7891 }
7892 
7893 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
7894 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2)7895 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2) {
7896   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7897   DeconvolutionOperatorTester()
7898     .batch_size(2)
7899     .input_size(kStridedInputHeight, kStridedInputWidth)
7900     .kernel_size(2, 2)
7901     .stride(2)
7902     .groups(2)
7903     .group_input_channels(17)
7904     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7905     .iterations(3)
7906     .TestQU8();
7907 }
7908 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_Kx2sKx2)7909 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_Kx2sKx2) {
7910   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7911   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
7912     DeconvolutionOperatorTester()
7913       .batch_size(2)
7914       .input_size(kStridedInputHeight, kStridedInputWidth)
7915       .kernel_size(kernel_height, 2)
7916       .stride(kernel_height, 2)
7917       .groups(2)
7918       .group_input_channels(17)
7919       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7920       .iterations(3)
7921       .TestQU8();
7922   }
7923 }
7924 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2xKs2xK)7925 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2xKs2xK) {
7926   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7927   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
7928     DeconvolutionOperatorTester()
7929       .batch_size(2)
7930       .input_size(kStridedInputHeight, kStridedInputWidth)
7931       .kernel_size(2, kernel_width)
7932       .stride(2, kernel_width)
7933       .groups(2)
7934       .group_input_channels(17)
7935       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7936       .iterations(3)
7937       .TestQU8();
7938   }
7939 }
7940 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_height_adjustment)7941 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_height_adjustment) {
7942   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7943   DeconvolutionOperatorTester()
7944     .batch_size(2)
7945     .input_size(kStridedInputHeight, kStridedInputWidth)
7946     .adjustment_height(1)
7947     .kernel_size(2, 2)
7948     .stride(2)
7949     .groups(2)
7950     .group_input_channels(17)
7951     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7952     .iterations(1)
7953     .TestQU8();
7954 }
7955 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_width_adjustment)7956 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_width_adjustment) {
7957   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7958   DeconvolutionOperatorTester()
7959     .batch_size(2)
7960     .input_size(kStridedInputHeight, kStridedInputWidth)
7961     .adjustment_width(1)
7962     .kernel_size(2, 2)
7963     .stride(2)
7964     .groups(2)
7965     .group_input_channels(17)
7966     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7967     .iterations(1)
7968     .TestQU8();
7969 }
7970 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_input_height)7971 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_input_height) {
7972   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7973   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
7974     DeconvolutionOperatorTester()
7975       .batch_size(2)
7976       .input_size(input_height, kStridedInputWidth)
7977       .kernel_size(2, 2)
7978       .stride(2)
7979       .groups(2)
7980       .group_input_channels(17)
7981       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7982       .iterations(1)
7983       .TestQU8();
7984   }
7985 }
7986 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_input_width)7987 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_input_width) {
7988   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
7989   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
7990     DeconvolutionOperatorTester()
7991       .batch_size(2)
7992       .input_size(kStridedInputHeight, kStridedInputWidth)
7993       .kernel_size(2, 2)
7994       .stride(2)
7995       .groups(2)
7996       .group_input_channels(17)
7997       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
7998       .iterations(1)
7999       .TestQU8();
8000   }
8001 }
8002 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_input_channels)8003 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_input_channels) {
8004   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8005   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
8006     DeconvolutionOperatorTester()
8007       .batch_size(2)
8008       .input_size(kStridedInputHeight, kStridedInputWidth)
8009       .kernel_size(2, 2)
8010       .stride(2)
8011       .groups(2)
8012       .group_input_channels(input_channels)
8013       .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8014       .iterations(1)
8015       .TestQU8();
8016   }
8017 }
8018 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_varying_output_channels)8019 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_varying_output_channels) {
8020   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8021   for (size_t output_channels = 1; output_channels <= xnn_params.qu8.gemm.nr * 2; output_channels *= 2) {
8022     DeconvolutionOperatorTester()
8023       .batch_size(2)
8024       .input_size(kStridedInputHeight, kStridedInputWidth)
8025       .kernel_size(2, 2)
8026       .stride(2)
8027       .groups(2)
8028       .group_input_channels(17)
8029       .group_output_channels(output_channels)
8030       .iterations(1)
8031       .TestQU8();
8032   }
8033 }
8034 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_input_stride)8035 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_input_stride) {
8036   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8037   DeconvolutionOperatorTester()
8038     .batch_size(2)
8039     .input_size(kStridedInputHeight, kStridedInputWidth)
8040     .kernel_size(2, 2)
8041     .stride(2)
8042     .groups(2)
8043     .group_input_channels(17)
8044     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8045     .input_pixel_stride(37)
8046     .iterations(3)
8047     .TestQU8();
8048 }
8049 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_output_stride)8050 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_output_stride) {
8051   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8052   DeconvolutionOperatorTester()
8053     .batch_size(2)
8054     .input_size(kStridedInputHeight, kStridedInputWidth)
8055     .kernel_size(2, 2)
8056     .stride(2)
8057     .groups(2)
8058     .group_input_channels(17)
8059     .group_output_channels(xnn_params.qu8.gemm.nr + 3)
8060     .output_pixel_stride(xnn_params.qu8.gemm.nr * 2 + 13)
8061     .iterations(3)
8062     .TestQU8();
8063 }
8064 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_qmin)8065 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_qmin) {
8066   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8067   DeconvolutionOperatorTester()
8068     .batch_size(2)
8069     .input_size(kStridedInputHeight, kStridedInputWidth)
8070     .kernel_size(2, 2)
8071     .stride(2)
8072     .groups(2)
8073     .group_input_channels(17)
8074     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8075     .qmin(128)
8076     .iterations(3)
8077     .TestQU8();
8078 }
8079 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_with_qmax)8080 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_with_qmax) {
8081   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8082   DeconvolutionOperatorTester()
8083     .batch_size(2)
8084     .input_size(kStridedInputHeight, kStridedInputWidth)
8085     .kernel_size(2, 2)
8086     .stride(2)
8087     .groups(2)
8088     .group_input_channels(17)
8089     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8090     .qmax(128)
8091     .iterations(3)
8092     .TestQU8();
8093 }
8094 
TEST(DECONVOLUTION_NHWC_QU8,batched_grouped_2x2s2_without_bias)8095 TEST(DECONVOLUTION_NHWC_QU8, batched_grouped_2x2s2_without_bias) {
8096   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8097   DeconvolutionOperatorTester()
8098     .has_bias(false)
8099     .batch_size(2)
8100     .input_size(kStridedInputHeight, kStridedInputWidth)
8101     .kernel_size(2, 2)
8102     .stride(2)
8103     .groups(2)
8104     .group_input_channels(17)
8105     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8106     .iterations(3)
8107     .TestQU8();
8108 }
8109 
TEST(DECONVOLUTION_NHWC_QU8,weights_cache_batched_grouped_2x2s2)8110 TEST(DECONVOLUTION_NHWC_QU8, weights_cache_batched_grouped_2x2s2) {
8111   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8112   DeconvolutionOperatorTester()
8113     .batch_size(2)
8114     .input_size(kStridedInputHeight, kStridedInputWidth)
8115     .kernel_size(2, 2)
8116     .stride(2)
8117     .groups(2)
8118     .group_input_channels(17)
8119     .group_output_channels(xnn_params.qu8.gemm.nr * 2 + 3)
8120     .use_weights_cache(true)
8121     .iterations(3)
8122     .TestQU8();
8123 }
8124 
8125 /**************************** SUBCONV2D/GEMM path, setup ****************************/
8126 
8127 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_setup_changing_batch) {
8128   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8129   DeconvolutionOperatorTester()
8130     .batch_size(2)
8131     .next_batch_size(5)
8132     .input_size(kStridedInputHeight, kStridedInputWidth)
8133     .kernel_size(2, 2)
8134     .stride(2)
8135     .groups(2)
8136     .group_input_channels(15)
8137     .group_output_channels(17)
8138     .TestSetupQU8();
8139 }
8140 
8141 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_setup_changing_height) {
8142   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8143   DeconvolutionOperatorTester()
8144     .batch_size(2)
8145     .input_size(kStridedInputHeight, kStridedInputWidth)
8146     .next_input_height(kStridedInputHeight + 3)
8147     .kernel_size(2, 2)
8148     .stride(2)
8149     .groups(2)
8150     .group_input_channels(15)
8151     .group_output_channels(17)
8152     .TestSetupQU8();
8153 }
8154 
8155 TEST(DECONVOLUTION_NHWC_QU8, 2x2s2_setup_changing_width) {
8156   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8157   DeconvolutionOperatorTester()
8158     .batch_size(2)
8159     .input_size(kStridedInputHeight, kStridedInputWidth)
8160     .next_input_width(kStridedInputWidth + 3)
8161     .kernel_size(2, 2)
8162     .stride(2)
8163     .groups(2)
8164     .group_input_channels(15)
8165     .group_output_channels(17)
8166     .TestSetupQU8();
8167 }
8168 
8169 /**************************** Future GEMM path ****************************/
8170 
8171 TEST(DECONVOLUTION_NHWC_F16, 1x1) {
8172   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8173   DeconvolutionOperatorTester()
8174     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8175     .kernel_size(1, 1)
8176     .group_input_channels(23)
8177     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8178     .iterations(3)
8179     .TestF16();
8180 }
8181 
8182 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_fp32_weights) {
8183   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8184   DeconvolutionOperatorTester()
8185     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8186     .kernel_size(1, 1)
8187     .group_input_channels(23)
8188     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8189     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8190     .iterations(3)
8191     .TestF16();
8192 }
8193 
8194 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_input_width) {
8195   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8196   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8197     DeconvolutionOperatorTester()
8198       .input_size(input_height, kUnstridedInputWidth)
8199       .kernel_size(1, 1)
8200       .group_input_channels(23)
8201       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8202       .iterations(1)
8203       .TestF16();
8204   }
8205 }
8206 
8207 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_input_height) {
8208   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8209   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8210     DeconvolutionOperatorTester()
8211       .input_size(kUnstridedInputHeight, input_width)
8212       .kernel_size(1, 1)
8213       .group_input_channels(23)
8214       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8215       .iterations(1)
8216       .TestF16();
8217   }
8218 }
8219 
8220 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_input_channels) {
8221   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8222   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8223     DeconvolutionOperatorTester()
8224       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8225       .kernel_size(1, 1)
8226       .group_input_channels(input_channels)
8227       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8228       .iterations(1)
8229       .TestF16();
8230   }
8231 }
8232 
8233 TEST(DECONVOLUTION_NHWC_F16, 1x1_varying_output_channels) {
8234   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8235   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8236     DeconvolutionOperatorTester()
8237       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8238       .kernel_size(1, 1)
8239       .group_input_channels(23)
8240       .group_output_channels(output_channels)
8241       .iterations(1)
8242       .TestF16();
8243   }
8244 }
8245 
8246 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_input_stride) {
8247   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8248   DeconvolutionOperatorTester()
8249     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8250     .kernel_size(1, 1)
8251     .group_input_channels(23)
8252     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8253     .input_pixel_stride(28)
8254     .iterations(3)
8255     .TestF16();
8256 }
8257 
8258 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_output_stride) {
8259   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8260   DeconvolutionOperatorTester()
8261     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8262     .kernel_size(1, 1)
8263     .group_input_channels(23)
8264     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8265     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8266     .iterations(3)
8267     .TestF16();
8268 }
8269 
8270 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_qmin) {
8271   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8272   DeconvolutionOperatorTester()
8273     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8274     .kernel_size(1, 1)
8275     .group_input_channels(23)
8276     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8277     .qmin(128)
8278     .iterations(3)
8279     .TestF16();
8280 }
8281 
8282 TEST(DECONVOLUTION_NHWC_F16, 1x1_with_qmax) {
8283   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8284   DeconvolutionOperatorTester()
8285     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8286     .kernel_size(1, 1)
8287     .group_input_channels(23)
8288     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8289     .qmax(128)
8290     .iterations(3)
8291     .TestF16();
8292 }
8293 
8294 TEST(DECONVOLUTION_NHWC_F16, 1x1_without_bias) {
8295   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8296   DeconvolutionOperatorTester()
8297     .has_bias(false)
8298     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8299     .kernel_size(1, 1)
8300     .group_input_channels(23)
8301     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8302     .iterations(3)
8303     .TestF16();
8304 }
8305 
8306 /**************************** Future GEMM path, grouped ****************************/
8307 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1)8308 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1) {
8309   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8310   DeconvolutionOperatorTester()
8311     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8312     .kernel_size(1, 1)
8313     .groups(2)
8314     .group_input_channels(23)
8315     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8316     .iterations(3)
8317     .TestF16();
8318 }
8319 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_fp32_weights)8320 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_fp32_weights) {
8321   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8322   DeconvolutionOperatorTester()
8323     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8324     .kernel_size(1, 1)
8325     .groups(2)
8326     .group_input_channels(23)
8327     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8328     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8329     .iterations(3)
8330     .TestF16();
8331 }
8332 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_input_width)8333 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_input_width) {
8334   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8335   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8336     DeconvolutionOperatorTester()
8337       .input_size(input_height, kUnstridedInputWidth)
8338       .kernel_size(1, 1)
8339       .groups(2)
8340       .group_input_channels(23)
8341       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8342       .iterations(1)
8343       .TestF16();
8344   }
8345 }
8346 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_input_height)8347 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_input_height) {
8348   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8349   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8350     DeconvolutionOperatorTester()
8351       .input_size(kUnstridedInputHeight, input_width)
8352       .kernel_size(1, 1)
8353       .groups(2)
8354       .group_input_channels(23)
8355       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8356       .iterations(1)
8357       .TestF16();
8358   }
8359 }
8360 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_input_channels)8361 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_input_channels) {
8362   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8363   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8364     DeconvolutionOperatorTester()
8365       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8366       .kernel_size(1, 1)
8367       .groups(2)
8368       .group_input_channels(input_channels)
8369       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8370       .iterations(1)
8371       .TestF16();
8372   }
8373 }
8374 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_varying_output_channels)8375 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_varying_output_channels) {
8376   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8377   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8378     DeconvolutionOperatorTester()
8379       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8380       .kernel_size(1, 1)
8381       .groups(2)
8382       .group_input_channels(23)
8383       .group_output_channels(output_channels)
8384       .iterations(1)
8385       .TestF16();
8386   }
8387 }
8388 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_input_stride)8389 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_input_stride) {
8390   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8391   DeconvolutionOperatorTester()
8392     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8393     .kernel_size(1, 1)
8394     .groups(2)
8395     .group_input_channels(23)
8396     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8397     .input_pixel_stride(47)
8398     .iterations(3)
8399     .TestF16();
8400 }
8401 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_output_stride)8402 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_output_stride) {
8403   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8404   DeconvolutionOperatorTester()
8405     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8406     .kernel_size(1, 1)
8407     .groups(2)
8408     .group_input_channels(23)
8409     .group_output_channels(xnn_params.f16.gemm.nr + 3)
8410     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8411     .iterations(3)
8412     .TestF16();
8413 }
8414 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_qmin)8415 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_qmin) {
8416   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8417   DeconvolutionOperatorTester()
8418     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8419     .kernel_size(1, 1)
8420     .groups(2)
8421     .group_input_channels(23)
8422     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8423     .qmin(128)
8424     .iterations(3)
8425     .TestF16();
8426 }
8427 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_with_qmax)8428 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_with_qmax) {
8429   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8430   DeconvolutionOperatorTester()
8431     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8432     .kernel_size(1, 1)
8433     .groups(2)
8434     .group_input_channels(23)
8435     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8436     .qmax(128)
8437     .iterations(3)
8438     .TestF16();
8439 }
8440 
TEST(DECONVOLUTION_NHWC_F16,grouped_1x1_without_bias)8441 TEST(DECONVOLUTION_NHWC_F16, grouped_1x1_without_bias) {
8442   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8443   DeconvolutionOperatorTester()
8444     .has_bias(false)
8445     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8446     .kernel_size(1, 1)
8447     .groups(2)
8448     .group_input_channels(23)
8449     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8450     .iterations(3)
8451     .TestF16();
8452 }
8453 
8454 /**************************** Future GEMM path, batched ****************************/
8455 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1)8456 TEST(DECONVOLUTION_NHWC_F16, batched_1x1) {
8457   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8458   DeconvolutionOperatorTester()
8459     .batch_size(2)
8460     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8461     .kernel_size(1, 1)
8462     .group_input_channels(23)
8463     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8464     .iterations(3)
8465     .TestF16();
8466 }
8467 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_fp32_weights)8468 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_fp32_weights) {
8469   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8470   DeconvolutionOperatorTester()
8471     .batch_size(2)
8472     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8473     .kernel_size(1, 1)
8474     .group_input_channels(23)
8475     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8476     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8477     .iterations(3)
8478     .TestF16();
8479 }
8480 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_input_width)8481 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_input_width) {
8482   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8483   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8484     DeconvolutionOperatorTester()
8485       .batch_size(2)
8486       .input_size(input_height, kUnstridedInputWidth)
8487       .kernel_size(1, 1)
8488       .group_input_channels(23)
8489       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8490       .iterations(1)
8491       .TestF16();
8492   }
8493 }
8494 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_input_height)8495 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_input_height) {
8496   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8497   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8498     DeconvolutionOperatorTester()
8499       .batch_size(2)
8500       .input_size(kUnstridedInputHeight, input_width)
8501       .kernel_size(1, 1)
8502       .group_input_channels(23)
8503       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8504       .iterations(1)
8505       .TestF16();
8506   }
8507 }
8508 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_input_channels)8509 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_input_channels) {
8510   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8511   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8512     DeconvolutionOperatorTester()
8513       .batch_size(2)
8514       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8515       .kernel_size(1, 1)
8516       .group_input_channels(input_channels)
8517       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8518       .iterations(1)
8519       .TestF16();
8520   }
8521 }
8522 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_varying_output_channels)8523 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_varying_output_channels) {
8524   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8525   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8526     DeconvolutionOperatorTester()
8527       .batch_size(2)
8528       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8529       .kernel_size(1, 1)
8530       .group_input_channels(23)
8531       .group_output_channels(output_channels)
8532       .iterations(1)
8533       .TestF16();
8534   }
8535 }
8536 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_input_stride)8537 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_input_stride) {
8538   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8539   DeconvolutionOperatorTester()
8540     .batch_size(2)
8541     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8542     .kernel_size(1, 1)
8543     .group_input_channels(23)
8544     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8545     .input_pixel_stride(28)
8546     .iterations(3)
8547     .TestF16();
8548 }
8549 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_output_stride)8550 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_output_stride) {
8551   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8552   DeconvolutionOperatorTester()
8553     .batch_size(2)
8554     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8555     .kernel_size(1, 1)
8556     .group_input_channels(23)
8557     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8558     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8559     .iterations(3)
8560     .TestF16();
8561 }
8562 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_qmin)8563 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_qmin) {
8564   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8565   DeconvolutionOperatorTester()
8566     .batch_size(2)
8567     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8568     .kernel_size(1, 1)
8569     .group_input_channels(23)
8570     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8571     .qmin(128)
8572     .iterations(3)
8573     .TestF16();
8574 }
8575 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_with_qmax)8576 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_with_qmax) {
8577   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8578   DeconvolutionOperatorTester()
8579     .batch_size(2)
8580     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8581     .kernel_size(1, 1)
8582     .group_input_channels(23)
8583     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8584     .qmax(128)
8585     .iterations(3)
8586     .TestF16();
8587 }
8588 
TEST(DECONVOLUTION_NHWC_F16,batched_1x1_without_bias)8589 TEST(DECONVOLUTION_NHWC_F16, batched_1x1_without_bias) {
8590   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8591   DeconvolutionOperatorTester()
8592     .has_bias(false)
8593     .batch_size(2)
8594     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8595     .kernel_size(1, 1)
8596     .group_input_channels(23)
8597     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8598     .iterations(3)
8599     .TestF16();
8600 }
8601 
8602 /**************************** Future GEMM path, batched, grouped ****************************/
8603 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1)8604 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1) {
8605   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8606   DeconvolutionOperatorTester()
8607     .batch_size(2)
8608     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8609     .kernel_size(1, 1)
8610     .groups(2)
8611     .group_input_channels(23)
8612     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8613     .iterations(3)
8614     .TestF16();
8615 }
8616 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_fp32_weights)8617 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_fp32_weights) {
8618   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8619   DeconvolutionOperatorTester()
8620     .batch_size(2)
8621     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8622     .kernel_size(1, 1)
8623     .groups(2)
8624     .group_input_channels(23)
8625     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8626     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8627     .iterations(3)
8628     .TestF16();
8629 }
8630 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_input_width)8631 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_input_width) {
8632   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8633   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8634     DeconvolutionOperatorTester()
8635       .batch_size(2)
8636       .input_size(input_height, kUnstridedInputWidth)
8637       .kernel_size(1, 1)
8638       .groups(2)
8639       .group_input_channels(23)
8640       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8641       .iterations(1)
8642       .TestF16();
8643   }
8644 }
8645 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_input_height)8646 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_input_height) {
8647   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8648   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8649     DeconvolutionOperatorTester()
8650       .batch_size(2)
8651       .input_size(kUnstridedInputHeight, input_width)
8652       .kernel_size(1, 1)
8653       .groups(2)
8654       .group_input_channels(23)
8655       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8656       .iterations(1)
8657       .TestF16();
8658   }
8659 }
8660 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_input_channels)8661 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_input_channels) {
8662   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8663   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8664     DeconvolutionOperatorTester()
8665       .batch_size(2)
8666       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8667       .kernel_size(1, 1)
8668       .groups(2)
8669       .group_input_channels(input_channels)
8670       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8671       .iterations(1)
8672       .TestF16();
8673   }
8674 }
8675 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_varying_output_channels)8676 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_varying_output_channels) {
8677   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8678   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8679     DeconvolutionOperatorTester()
8680       .batch_size(2)
8681       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8682       .kernel_size(1, 1)
8683       .groups(2)
8684       .group_input_channels(23)
8685       .group_output_channels(output_channels)
8686       .iterations(1)
8687       .TestF16();
8688   }
8689 }
8690 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_input_stride)8691 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_input_stride) {
8692   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8693   DeconvolutionOperatorTester()
8694     .batch_size(2)
8695     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8696     .kernel_size(1, 1)
8697     .groups(2)
8698     .group_input_channels(23)
8699     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8700     .input_pixel_stride(47)
8701     .iterations(3)
8702     .TestF16();
8703 }
8704 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_output_stride)8705 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_output_stride) {
8706   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8707   DeconvolutionOperatorTester()
8708     .batch_size(2)
8709     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8710     .kernel_size(1, 1)
8711     .groups(2)
8712     .group_input_channels(23)
8713     .group_output_channels(xnn_params.f16.gemm.nr + 3)
8714     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
8715     .iterations(3)
8716     .TestF16();
8717 }
8718 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_qmin)8719 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_qmin) {
8720   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8721   DeconvolutionOperatorTester()
8722     .batch_size(2)
8723     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8724     .kernel_size(1, 1)
8725     .groups(2)
8726     .group_input_channels(23)
8727     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8728     .qmin(128)
8729     .iterations(3)
8730     .TestF16();
8731 }
8732 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_with_qmax)8733 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_with_qmax) {
8734   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8735   DeconvolutionOperatorTester()
8736     .batch_size(2)
8737     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8738     .kernel_size(1, 1)
8739     .groups(2)
8740     .group_input_channels(23)
8741     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8742     .qmax(128)
8743     .iterations(3)
8744     .TestF16();
8745 }
8746 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_1x1_without_bias)8747 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_1x1_without_bias) {
8748   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8749   DeconvolutionOperatorTester()
8750     .has_bias(false)
8751     .batch_size(2)
8752     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8753     .kernel_size(1, 1)
8754     .groups(2)
8755     .group_input_channels(23)
8756     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8757     .iterations(3)
8758     .TestF16();
8759 }
8760 
8761 /**************************** CONV path ****************************/
8762 
8763 TEST(DECONVOLUTION_NHWC_F16, 3x3) {
8764   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8765   DeconvolutionOperatorTester()
8766     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8767     .padding(1)
8768     .kernel_size(3, 3)
8769     .group_input_channels(15)
8770     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8771     .iterations(3)
8772     .TestF16();
8773 }
8774 
8775 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_fp32_weights) {
8776   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8777   DeconvolutionOperatorTester()
8778     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8779     .padding(1)
8780     .kernel_size(3, 3)
8781     .group_input_channels(15)
8782     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8783     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
8784     .iterations(3)
8785     .TestF16();
8786 }
8787 
TEST(DECONVOLUTION_NHWC_F16,Kx3)8788 TEST(DECONVOLUTION_NHWC_F16, Kx3) {
8789   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8790   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
8791     DeconvolutionOperatorTester()
8792       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8793       .padding_width(1)
8794       .kernel_size(kernel_height, 3)
8795       .group_input_channels(17)
8796       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8797       .iterations(3)
8798       .TestF16();
8799   }
8800 }
8801 
8802 TEST(DECONVOLUTION_NHWC_F16, 3xK) {
8803   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8804   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
8805     DeconvolutionOperatorTester()
8806       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8807       .padding_height(1)
8808       .kernel_size(3, kernel_width)
8809       .group_input_channels(17)
8810       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8811       .iterations(3)
8812       .TestF16();
8813   }
8814 }
8815 
8816 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_height_padding) {
8817   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8818   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
8819     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
8820       DeconvolutionOperatorTester()
8821         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8822         .padding_width(1)
8823         .padding_top(padding_top)
8824         .padding_bottom(padding_bottom)
8825         .kernel_size(3, 3)
8826         .group_input_channels(15)
8827         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8828         .iterations(1)
8829         .TestF16();
8830     }
8831   }
8832 }
8833 
8834 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_width_padding) {
8835   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8836   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
8837     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
8838       DeconvolutionOperatorTester()
8839         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8840         .padding_height(1)
8841         .padding_left(padding_left)
8842         .padding_right(padding_right)
8843         .kernel_size(3, 3)
8844         .group_input_channels(15)
8845         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8846         .iterations(1)
8847         .TestF16();
8848     }
8849   }
8850 }
8851 
8852 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_height_adjustment) {
8853   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8854   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
8855     DeconvolutionOperatorTester()
8856       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8857       .padding(1)
8858       .stride_height(adjustment_height + 1)
8859       .adjustment_height(adjustment_height)
8860       .kernel_size(3, 3)
8861       .group_input_channels(15)
8862       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8863       .iterations(1)
8864       .TestF16();
8865   }
8866 }
8867 
8868 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_width_adjustment) {
8869   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8870   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
8871     DeconvolutionOperatorTester()
8872       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8873       .padding(1)
8874       .stride_width(adjustment_width + 1)
8875       .adjustment_width(adjustment_width)
8876       .kernel_size(3, 3)
8877       .group_input_channels(15)
8878       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8879       .iterations(1)
8880       .TestF16();
8881   }
8882 }
8883 
8884 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_input_height) {
8885   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8886   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
8887     DeconvolutionOperatorTester()
8888       .input_size(input_height, kUnstridedInputWidth)
8889       .padding(1)
8890       .kernel_size(3, 3)
8891       .group_input_channels(15)
8892       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8893       .iterations(1)
8894       .TestF16();
8895   }
8896 }
8897 
8898 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_input_width) {
8899   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8900   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
8901     DeconvolutionOperatorTester()
8902       .input_size(kUnstridedInputHeight, input_width)
8903       .padding(1)
8904       .kernel_size(3, 3)
8905       .group_input_channels(15)
8906       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8907       .iterations(1)
8908       .TestF16();
8909   }
8910 }
8911 
8912 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_input_channels) {
8913   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8914   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
8915     DeconvolutionOperatorTester()
8916       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8917       .padding(1)
8918       .kernel_size(3, 3)
8919       .group_input_channels(input_channels)
8920       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8921       .iterations(1)
8922       .TestF16();
8923   }
8924 }
8925 
8926 TEST(DECONVOLUTION_NHWC_F16, 3x3_varying_output_channels) {
8927   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8928   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
8929     DeconvolutionOperatorTester()
8930       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8931       .padding(1)
8932       .kernel_size(3, 3)
8933       .group_input_channels(23)
8934       .group_output_channels(output_channels)
8935       .iterations(1)
8936       .TestF16();
8937   }
8938 }
8939 
8940 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_height_dilation) {
8941   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8942   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
8943     DeconvolutionOperatorTester()
8944       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8945       .padding(1)
8946       .kernel_size(3, 3)
8947       .dilation_height(dilation_height)
8948       .group_input_channels(23)
8949       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8950       .iterations(3)
8951       .TestF16();
8952   }
8953 }
8954 
8955 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_width_dilation) {
8956   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8957   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
8958     DeconvolutionOperatorTester()
8959       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8960       .padding(1)
8961       .kernel_size(3, 3)
8962       .dilation_width(dilation_width)
8963       .group_input_channels(23)
8964       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8965       .iterations(3)
8966       .TestF16();
8967   }
8968 }
8969 
8970 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_height_dilation_and_stride) {
8971   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8972   DeconvolutionOperatorTester()
8973     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8974     .padding(1)
8975     .kernel_size(3, 3)
8976     .dilation_height(3)
8977     .stride_height(2)
8978     .group_input_channels(23)
8979     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8980     .iterations(3)
8981     .TestF16();
8982 }
8983 
8984 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_width_dilation_and_stride) {
8985   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
8986   DeconvolutionOperatorTester()
8987     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
8988     .padding(1)
8989     .kernel_size(3, 3)
8990     .dilation_width(3)
8991     .stride_width(2)
8992     .group_input_channels(23)
8993     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
8994     .iterations(3)
8995     .TestF16();
8996 }
8997 
8998 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_input_stride) {
8999   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9000   DeconvolutionOperatorTester()
9001     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9002     .padding(1)
9003     .kernel_size(3, 3)
9004     .group_input_channels(23)
9005     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9006     .input_pixel_stride(28)
9007     .iterations(3)
9008     .TestF16();
9009 }
9010 
9011 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_output_stride) {
9012   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9013   DeconvolutionOperatorTester()
9014     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9015     .padding(1)
9016     .kernel_size(3, 3)
9017     .group_input_channels(23)
9018     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9019     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
9020     .iterations(3)
9021     .TestF16();
9022 }
9023 
9024 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_qmin) {
9025   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9026   DeconvolutionOperatorTester()
9027     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9028     .padding(1)
9029     .kernel_size(3, 3)
9030     .group_input_channels(23)
9031     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9032     .qmin(128)
9033     .iterations(3)
9034     .TestF16();
9035 }
9036 
9037 TEST(DECONVOLUTION_NHWC_F16, 3x3_with_qmax) {
9038   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9039   DeconvolutionOperatorTester()
9040     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9041     .padding(1)
9042     .kernel_size(3, 3)
9043     .group_input_channels(23)
9044     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9045     .qmax(128)
9046     .iterations(3)
9047     .TestF16();
9048 }
9049 
9050 TEST(DECONVOLUTION_NHWC_F16, 3x3_without_bias) {
9051   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9052   DeconvolutionOperatorTester()
9053     .has_bias(false)
9054     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9055     .padding(1)
9056     .kernel_size(3, 3)
9057     .group_input_channels(23)
9058     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9059     .iterations(3)
9060     .TestF16();
9061 }
9062 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_3x3)9063 TEST(DECONVOLUTION_NHWC_F16, weights_cache_3x3) {
9064   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9065   DeconvolutionOperatorTester()
9066     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9067     .padding(1)
9068     .kernel_size(3, 3)
9069     .group_input_channels(15)
9070     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9071     .use_weights_cache(true)
9072     .iterations(3)
9073     .TestF16();
9074 }
9075 
9076 /**************************** CONV path, grouped ****************************/
9077 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3)9078 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3) {
9079   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9080   DeconvolutionOperatorTester()
9081     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9082     .padding(1)
9083     .kernel_size(3, 3)
9084     .groups(2)
9085     .group_input_channels(15)
9086     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9087     .iterations(3)
9088     .TestF16();
9089 }
9090 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_fp32_weights)9091 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_fp32_weights) {
9092   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9093   DeconvolutionOperatorTester()
9094     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9095     .padding(1)
9096     .kernel_size(3, 3)
9097     .groups(2)
9098     .group_input_channels(15)
9099     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9100     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
9101     .iterations(3)
9102     .TestF16();
9103 }
9104 
TEST(DECONVOLUTION_NHWC_F16,grouped_Kx3)9105 TEST(DECONVOLUTION_NHWC_F16, grouped_Kx3) {
9106   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9107   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
9108     DeconvolutionOperatorTester()
9109       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9110       .padding_width(1)
9111       .kernel_size(kernel_height, 3)
9112       .groups(2)
9113       .group_input_channels(17)
9114       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9115       .iterations(3)
9116       .TestF16();
9117   }
9118 }
9119 
TEST(DECONVOLUTION_NHWC_F16,grouped_3xK)9120 TEST(DECONVOLUTION_NHWC_F16, grouped_3xK) {
9121   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9122   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
9123     DeconvolutionOperatorTester()
9124       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9125       .padding_height(1)
9126       .kernel_size(3, kernel_width)
9127       .groups(2)
9128       .group_input_channels(17)
9129       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9130       .iterations(3)
9131       .TestF16();
9132   }
9133 }
9134 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_height_padding)9135 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_height_padding) {
9136   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9137   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
9138     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
9139       DeconvolutionOperatorTester()
9140         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9141         .padding_width(1)
9142         .padding_top(padding_top)
9143         .padding_bottom(padding_bottom)
9144         .kernel_size(3, 3)
9145         .groups(2)
9146         .group_input_channels(15)
9147         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9148         .iterations(1)
9149         .TestF16();
9150     }
9151   }
9152 }
9153 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_width_padding)9154 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_width_padding) {
9155   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9156   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
9157     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
9158       DeconvolutionOperatorTester()
9159         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9160         .padding_height(1)
9161         .padding_left(padding_left)
9162         .padding_right(padding_right)
9163         .kernel_size(3, 3)
9164         .groups(2)
9165         .group_input_channels(15)
9166         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9167         .iterations(1)
9168         .TestF16();
9169     }
9170   }
9171 }
9172 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_height_adjustment)9173 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_height_adjustment) {
9174   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9175   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
9176     DeconvolutionOperatorTester()
9177       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9178       .padding(1)
9179       .stride_height(adjustment_height + 1)
9180       .adjustment_height(adjustment_height)
9181       .kernel_size(3, 3)
9182       .groups(2)
9183       .group_input_channels(15)
9184       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9185       .iterations(1)
9186       .TestF16();
9187   }
9188 }
9189 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_width_adjustment)9190 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_width_adjustment) {
9191   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9192   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
9193     DeconvolutionOperatorTester()
9194       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9195       .padding(1)
9196       .stride_width(adjustment_width + 1)
9197       .adjustment_width(adjustment_width)
9198       .kernel_size(3, 3)
9199       .groups(2)
9200       .group_input_channels(15)
9201       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9202       .iterations(1)
9203       .TestF16();
9204   }
9205 }
9206 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_input_height)9207 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_input_height) {
9208   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9209   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
9210     DeconvolutionOperatorTester()
9211       .input_size(input_height, kUnstridedInputWidth)
9212       .padding(1)
9213       .kernel_size(3, 3)
9214       .groups(2)
9215       .group_input_channels(15)
9216       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9217       .iterations(1)
9218       .TestF16();
9219   }
9220 }
9221 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_input_width)9222 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_input_width) {
9223   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9224   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
9225     DeconvolutionOperatorTester()
9226       .input_size(kUnstridedInputHeight, input_width)
9227       .padding(1)
9228       .kernel_size(3, 3)
9229       .groups(2)
9230       .group_input_channels(15)
9231       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9232       .iterations(1)
9233       .TestF16();
9234   }
9235 }
9236 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_input_channels)9237 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_input_channels) {
9238   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9239   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
9240     DeconvolutionOperatorTester()
9241       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9242       .padding(1)
9243       .kernel_size(3, 3)
9244       .groups(2)
9245       .group_input_channels(input_channels)
9246       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9247       .iterations(1)
9248       .TestF16();
9249   }
9250 }
9251 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_varying_output_channels)9252 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_varying_output_channels) {
9253   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9254   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
9255     DeconvolutionOperatorTester()
9256       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9257       .padding(1)
9258       .kernel_size(3, 3)
9259       .groups(2)
9260       .group_input_channels(23)
9261       .group_output_channels(output_channels)
9262       .iterations(1)
9263       .TestF16();
9264   }
9265 }
9266 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_height_dilation)9267 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_height_dilation) {
9268   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9269   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
9270     DeconvolutionOperatorTester()
9271       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9272       .padding(1)
9273       .kernel_size(3, 3)
9274       .dilation_height(dilation_height)
9275       .groups(2)
9276       .group_input_channels(23)
9277       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9278       .iterations(3)
9279       .TestF16();
9280   }
9281 }
9282 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_width_dilation)9283 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_width_dilation) {
9284   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9285   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
9286     DeconvolutionOperatorTester()
9287       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9288       .padding(1)
9289       .kernel_size(3, 3)
9290       .dilation_width(dilation_width)
9291       .groups(2)
9292       .group_input_channels(23)
9293       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9294       .iterations(3)
9295       .TestF16();
9296   }
9297 }
9298 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_height_dilation_and_stride)9299 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_height_dilation_and_stride) {
9300   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9301   DeconvolutionOperatorTester()
9302     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9303     .padding(1)
9304     .kernel_size(3, 3)
9305     .dilation_height(3)
9306     .stride_height(2)
9307     .groups(2)
9308     .group_input_channels(23)
9309     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9310     .iterations(3)
9311     .TestF16();
9312 }
9313 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_width_dilation_and_stride)9314 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_width_dilation_and_stride) {
9315   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9316   DeconvolutionOperatorTester()
9317     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9318     .padding(1)
9319     .kernel_size(3, 3)
9320     .dilation_width(3)
9321     .stride_width(2)
9322     .groups(2)
9323     .group_input_channels(23)
9324     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9325     .iterations(3)
9326     .TestF16();
9327 }
9328 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_input_stride)9329 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_input_stride) {
9330   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9331   DeconvolutionOperatorTester()
9332     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9333     .padding(1)
9334     .kernel_size(3, 3)
9335     .groups(2)
9336     .group_input_channels(23)
9337     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9338     .input_pixel_stride(47)
9339     .iterations(3)
9340     .TestF16();
9341 }
9342 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_output_stride)9343 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_output_stride) {
9344   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9345   DeconvolutionOperatorTester()
9346     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9347     .padding(1)
9348     .kernel_size(3, 3)
9349     .groups(2)
9350     .group_input_channels(23)
9351     .group_output_channels(xnn_params.f16.gemm.nr + 3)
9352     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
9353     .iterations(3)
9354     .TestF16();
9355 }
9356 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_qmin)9357 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_qmin) {
9358   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9359   DeconvolutionOperatorTester()
9360     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9361     .padding(1)
9362     .kernel_size(3, 3)
9363     .groups(2)
9364     .group_input_channels(23)
9365     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9366     .qmin(128)
9367     .iterations(3)
9368     .TestF16();
9369 }
9370 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_with_qmax)9371 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_with_qmax) {
9372   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9373   DeconvolutionOperatorTester()
9374     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9375     .padding(1)
9376     .kernel_size(3, 3)
9377     .groups(2)
9378     .group_input_channels(23)
9379     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9380     .qmax(128)
9381     .iterations(3)
9382     .TestF16();
9383 }
9384 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3_without_bias)9385 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3_without_bias) {
9386   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9387   DeconvolutionOperatorTester()
9388     .has_bias(false)
9389     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9390     .padding(1)
9391     .kernel_size(3, 3)
9392     .groups(2)
9393     .group_input_channels(23)
9394     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9395     .iterations(3)
9396     .TestF16();
9397 }
9398 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_grouped_3x3)9399 TEST(DECONVOLUTION_NHWC_F16, weights_cache_grouped_3x3) {
9400   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9401   DeconvolutionOperatorTester()
9402     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9403     .padding(1)
9404     .kernel_size(3, 3)
9405     .groups(2)
9406     .group_input_channels(15)
9407     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9408     .use_weights_cache(true)
9409     .iterations(3)
9410     .TestF16();
9411 }
9412 
9413 /**************************** CONV path, batched ****************************/
9414 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3)9415 TEST(DECONVOLUTION_NHWC_F16, batched_3x3) {
9416   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9417   DeconvolutionOperatorTester()
9418     .batch_size(2)
9419     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9420     .padding(1)
9421     .kernel_size(3, 3)
9422     .group_input_channels(15)
9423     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9424     .iterations(3)
9425     .TestF16();
9426 }
9427 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_fp32_weights)9428 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_fp32_weights) {
9429   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9430   DeconvolutionOperatorTester()
9431     .batch_size(2)
9432     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9433     .padding(1)
9434     .kernel_size(3, 3)
9435     .group_input_channels(15)
9436     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9437     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
9438     .iterations(3)
9439     .TestF16();
9440 }
9441 
TEST(DECONVOLUTION_NHWC_F16,batched_Kx3)9442 TEST(DECONVOLUTION_NHWC_F16, batched_Kx3) {
9443   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9444   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
9445     DeconvolutionOperatorTester()
9446       .batch_size(2)
9447       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9448       .padding_width(1)
9449       .kernel_size(kernel_height, 3)
9450       .group_input_channels(17)
9451       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9452       .iterations(3)
9453       .TestF16();
9454   }
9455 }
9456 
TEST(DECONVOLUTION_NHWC_F16,batched_3xK)9457 TEST(DECONVOLUTION_NHWC_F16, batched_3xK) {
9458   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9459   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
9460     DeconvolutionOperatorTester()
9461       .batch_size(2)
9462       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9463       .padding_height(1)
9464       .kernel_size(3, kernel_width)
9465       .group_input_channels(17)
9466       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9467       .iterations(3)
9468       .TestF16();
9469   }
9470 }
9471 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_height_padding)9472 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_height_padding) {
9473   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9474   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
9475     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
9476       DeconvolutionOperatorTester()
9477         .batch_size(2)
9478         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9479         .padding_width(1)
9480         .padding_top(padding_top)
9481         .padding_bottom(padding_bottom)
9482         .kernel_size(3, 3)
9483         .group_input_channels(15)
9484         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9485         .iterations(1)
9486         .TestF16();
9487     }
9488   }
9489 }
9490 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_width_padding)9491 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_width_padding) {
9492   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9493   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
9494     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
9495       DeconvolutionOperatorTester()
9496         .batch_size(2)
9497         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9498         .padding_height(1)
9499         .padding_left(padding_left)
9500         .padding_right(padding_right)
9501         .kernel_size(3, 3)
9502         .group_input_channels(15)
9503         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9504         .iterations(1)
9505         .TestF16();
9506     }
9507   }
9508 }
9509 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_height_adjustment)9510 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_height_adjustment) {
9511   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9512   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
9513     DeconvolutionOperatorTester()
9514       .batch_size(2)
9515       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9516       .padding(1)
9517       .stride_height(adjustment_height + 1)
9518       .adjustment_height(adjustment_height)
9519       .kernel_size(3, 3)
9520       .group_input_channels(15)
9521       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9522       .iterations(1)
9523       .TestF16();
9524   }
9525 }
9526 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_width_adjustment)9527 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_width_adjustment) {
9528   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9529   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
9530     DeconvolutionOperatorTester()
9531       .batch_size(2)
9532       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9533       .padding(1)
9534       .stride_width(adjustment_width + 1)
9535       .adjustment_width(adjustment_width)
9536       .kernel_size(3, 3)
9537       .group_input_channels(15)
9538       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9539       .iterations(1)
9540       .TestF16();
9541   }
9542 }
9543 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_input_height)9544 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_input_height) {
9545   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9546   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
9547     DeconvolutionOperatorTester()
9548       .batch_size(2)
9549       .input_size(input_height, kUnstridedInputWidth)
9550       .padding(1)
9551       .kernel_size(3, 3)
9552       .group_input_channels(15)
9553       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9554       .iterations(1)
9555       .TestF16();
9556   }
9557 }
9558 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_input_width)9559 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_input_width) {
9560   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9561   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
9562     DeconvolutionOperatorTester()
9563       .batch_size(2)
9564       .input_size(kUnstridedInputHeight, input_width)
9565       .padding(1)
9566       .kernel_size(3, 3)
9567       .group_input_channels(15)
9568       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9569       .iterations(1)
9570       .TestF16();
9571   }
9572 }
9573 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_input_channels)9574 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_input_channels) {
9575   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9576   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
9577     DeconvolutionOperatorTester()
9578       .batch_size(2)
9579       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9580       .padding(1)
9581       .kernel_size(3, 3)
9582       .group_input_channels(input_channels)
9583       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9584       .iterations(1)
9585       .TestF16();
9586   }
9587 }
9588 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_varying_output_channels)9589 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_varying_output_channels) {
9590   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9591   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
9592     DeconvolutionOperatorTester()
9593       .batch_size(2)
9594       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9595       .padding(1)
9596       .kernel_size(3, 3)
9597       .group_input_channels(23)
9598       .group_output_channels(output_channels)
9599       .iterations(1)
9600       .TestF16();
9601   }
9602 }
9603 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_height_dilation)9604 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_height_dilation) {
9605   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9606   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
9607     DeconvolutionOperatorTester()
9608       .batch_size(2)
9609       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9610       .padding(1)
9611       .kernel_size(3, 3)
9612       .dilation_height(dilation_height)
9613       .group_input_channels(23)
9614       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9615       .iterations(3)
9616       .TestF16();
9617   }
9618 }
9619 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_width_dilation)9620 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_width_dilation) {
9621   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9622   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
9623     DeconvolutionOperatorTester()
9624       .batch_size(2)
9625       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9626       .padding(1)
9627       .kernel_size(3, 3)
9628       .dilation_width(dilation_width)
9629       .group_input_channels(23)
9630       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9631       .iterations(3)
9632       .TestF16();
9633   }
9634 }
9635 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_height_dilation_and_stride)9636 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_height_dilation_and_stride) {
9637   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9638   DeconvolutionOperatorTester()
9639     .batch_size(2)
9640     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9641     .padding(1)
9642     .kernel_size(3, 3)
9643     .dilation_height(3)
9644     .stride_height(2)
9645     .group_input_channels(23)
9646     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9647     .iterations(3)
9648     .TestF16();
9649 }
9650 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_width_dilation_and_stride)9651 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_width_dilation_and_stride) {
9652   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9653   DeconvolutionOperatorTester()
9654     .batch_size(2)
9655     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9656     .padding(1)
9657     .kernel_size(3, 3)
9658     .dilation_width(3)
9659     .stride_width(2)
9660     .group_input_channels(23)
9661     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9662     .iterations(3)
9663     .TestF16();
9664 }
9665 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_input_stride)9666 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_input_stride) {
9667   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9668   DeconvolutionOperatorTester()
9669     .batch_size(2)
9670     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9671     .padding(1)
9672     .kernel_size(3, 3)
9673     .group_input_channels(23)
9674     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9675     .input_pixel_stride(28)
9676     .iterations(3)
9677     .TestF16();
9678 }
9679 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_output_stride)9680 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_output_stride) {
9681   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9682   DeconvolutionOperatorTester()
9683     .batch_size(2)
9684     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9685     .padding(1)
9686     .kernel_size(3, 3)
9687     .group_input_channels(23)
9688     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9689     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
9690     .iterations(3)
9691     .TestF16();
9692 }
9693 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_qmin)9694 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_qmin) {
9695   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9696   DeconvolutionOperatorTester()
9697     .batch_size(2)
9698     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9699     .padding(1)
9700     .kernel_size(3, 3)
9701     .group_input_channels(23)
9702     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9703     .qmin(128)
9704     .iterations(3)
9705     .TestF16();
9706 }
9707 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_with_qmax)9708 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_with_qmax) {
9709   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9710   DeconvolutionOperatorTester()
9711     .batch_size(2)
9712     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9713     .padding(1)
9714     .kernel_size(3, 3)
9715     .group_input_channels(23)
9716     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9717     .qmax(128)
9718     .iterations(3)
9719     .TestF16();
9720 }
9721 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3_without_bias)9722 TEST(DECONVOLUTION_NHWC_F16, batched_3x3_without_bias) {
9723   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9724   DeconvolutionOperatorTester()
9725     .has_bias(false)
9726     .batch_size(2)
9727     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9728     .padding(1)
9729     .kernel_size(3, 3)
9730     .group_input_channels(23)
9731     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9732     .iterations(3)
9733     .TestF16();
9734 }
9735 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_3x3)9736 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_3x3) {
9737   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9738   DeconvolutionOperatorTester()
9739     .batch_size(2)
9740     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9741     .padding(1)
9742     .kernel_size(3, 3)
9743     .group_input_channels(15)
9744     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9745     .use_weights_cache(true)
9746     .iterations(3)
9747     .TestF16();
9748 }
9749 
9750 /**************************** CONV path, grouped, batched ****************************/
9751 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3)9752 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3) {
9753   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9754   DeconvolutionOperatorTester()
9755     .batch_size(2)
9756     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9757     .padding(1)
9758     .kernel_size(3, 3)
9759     .groups(2)
9760     .group_input_channels(15)
9761     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9762     .iterations(3)
9763     .TestF16();
9764 }
9765 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_fp32_weights)9766 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_fp32_weights) {
9767   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9768   DeconvolutionOperatorTester()
9769     .batch_size(2)
9770     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9771     .padding(1)
9772     .kernel_size(3, 3)
9773     .groups(2)
9774     .group_input_channels(15)
9775     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9776     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
9777     .iterations(3)
9778     .TestF16();
9779 }
9780 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_Kx3)9781 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_Kx3) {
9782   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9783   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
9784     DeconvolutionOperatorTester()
9785       .batch_size(2)
9786       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9787       .padding_width(1)
9788       .kernel_size(kernel_height, 3)
9789       .groups(2)
9790       .group_input_channels(17)
9791       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9792       .iterations(3)
9793       .TestF16();
9794   }
9795 }
9796 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3xK)9797 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3xK) {
9798   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9799   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
9800     DeconvolutionOperatorTester()
9801       .batch_size(2)
9802       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9803       .padding_height(1)
9804       .kernel_size(3, kernel_width)
9805       .groups(2)
9806       .group_input_channels(17)
9807       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9808       .iterations(3)
9809       .TestF16();
9810   }
9811 }
9812 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_height_padding)9813 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_height_padding) {
9814   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9815   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
9816     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
9817       DeconvolutionOperatorTester()
9818         .batch_size(2)
9819         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9820         .padding_width(1)
9821         .padding_top(padding_top)
9822         .padding_bottom(padding_bottom)
9823         .kernel_size(3, 3)
9824         .groups(2)
9825         .group_input_channels(15)
9826         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9827         .iterations(1)
9828         .TestF16();
9829     }
9830   }
9831 }
9832 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_width_padding)9833 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_width_padding) {
9834   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9835   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
9836     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
9837       DeconvolutionOperatorTester()
9838         .batch_size(2)
9839         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9840         .padding_height(1)
9841         .padding_left(padding_left)
9842         .padding_right(padding_right)
9843         .kernel_size(3, 3)
9844         .groups(2)
9845         .group_input_channels(15)
9846         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9847         .iterations(1)
9848         .TestF16();
9849     }
9850   }
9851 }
9852 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_height_adjustment)9853 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_height_adjustment) {
9854   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9855   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
9856     DeconvolutionOperatorTester()
9857       .batch_size(2)
9858       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9859       .padding(1)
9860       .stride_height(adjustment_height + 1)
9861       .adjustment_height(adjustment_height)
9862       .kernel_size(3, 3)
9863       .groups(2)
9864       .group_input_channels(15)
9865       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9866       .iterations(1)
9867       .TestF16();
9868   }
9869 }
9870 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_width_adjustment)9871 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_width_adjustment) {
9872   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9873   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
9874     DeconvolutionOperatorTester()
9875       .batch_size(2)
9876       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9877       .padding(1)
9878       .stride_width(adjustment_width + 1)
9879       .adjustment_width(adjustment_width)
9880       .kernel_size(3, 3)
9881       .groups(2)
9882       .group_input_channels(15)
9883       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9884       .iterations(1)
9885       .TestF16();
9886   }
9887 }
9888 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_input_height)9889 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_input_height) {
9890   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9891   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
9892     DeconvolutionOperatorTester()
9893       .batch_size(2)
9894       .input_size(input_height, kUnstridedInputWidth)
9895       .padding(1)
9896       .kernel_size(3, 3)
9897       .groups(2)
9898       .group_input_channels(15)
9899       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9900       .iterations(1)
9901       .TestF16();
9902   }
9903 }
9904 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_input_width)9905 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_input_width) {
9906   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9907   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
9908     DeconvolutionOperatorTester()
9909       .batch_size(2)
9910       .input_size(kUnstridedInputHeight, input_width)
9911       .padding(1)
9912       .kernel_size(3, 3)
9913       .groups(2)
9914       .group_input_channels(15)
9915       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9916       .iterations(1)
9917       .TestF16();
9918   }
9919 }
9920 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_input_channels)9921 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_input_channels) {
9922   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9923   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
9924     DeconvolutionOperatorTester()
9925       .batch_size(2)
9926       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9927       .padding(1)
9928       .kernel_size(3, 3)
9929       .groups(2)
9930       .group_input_channels(input_channels)
9931       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9932       .iterations(1)
9933       .TestF16();
9934   }
9935 }
9936 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_varying_output_channels)9937 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_varying_output_channels) {
9938   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9939   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
9940     DeconvolutionOperatorTester()
9941       .batch_size(2)
9942       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9943       .padding(1)
9944       .kernel_size(3, 3)
9945       .groups(2)
9946       .group_input_channels(23)
9947       .group_output_channels(output_channels)
9948       .iterations(1)
9949       .TestF16();
9950   }
9951 }
9952 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_height_dilation)9953 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_height_dilation) {
9954   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9955   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
9956     DeconvolutionOperatorTester()
9957       .batch_size(2)
9958       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9959       .padding(1)
9960       .kernel_size(3, 3)
9961       .dilation_height(dilation_height)
9962       .groups(2)
9963       .group_input_channels(23)
9964       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9965       .iterations(3)
9966       .TestF16();
9967   }
9968 }
9969 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_width_dilation)9970 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_width_dilation) {
9971   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9972   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
9973     DeconvolutionOperatorTester()
9974       .batch_size(2)
9975       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9976       .padding(1)
9977       .kernel_size(3, 3)
9978       .dilation_width(dilation_width)
9979       .groups(2)
9980       .group_input_channels(23)
9981       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9982       .iterations(3)
9983       .TestF16();
9984   }
9985 }
9986 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_height_dilation_and_stride)9987 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_height_dilation_and_stride) {
9988   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
9989   DeconvolutionOperatorTester()
9990     .batch_size(2)
9991     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
9992     .padding(1)
9993     .kernel_size(3, 3)
9994     .dilation_height(3)
9995     .stride_width(2)
9996     .groups(2)
9997     .group_input_channels(23)
9998     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
9999     .iterations(3)
10000     .TestF16();
10001 }
10002 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_width_dilation_and_stride)10003 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_width_dilation_and_stride) {
10004   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10005   DeconvolutionOperatorTester()
10006     .batch_size(2)
10007     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10008     .padding(1)
10009     .kernel_size(3, 3)
10010     .dilation_width(3)
10011     .stride_width(2)
10012     .groups(2)
10013     .group_input_channels(23)
10014     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10015     .iterations(3)
10016     .TestF16();
10017 }
10018 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_input_stride)10019 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_input_stride) {
10020   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10021   DeconvolutionOperatorTester()
10022     .batch_size(2)
10023     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10024     .padding(1)
10025     .kernel_size(3, 3)
10026     .groups(2)
10027     .group_input_channels(23)
10028     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10029     .input_pixel_stride(47)
10030     .iterations(3)
10031     .TestF16();
10032 }
10033 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_output_stride)10034 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_output_stride) {
10035   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10036   DeconvolutionOperatorTester()
10037     .batch_size(2)
10038     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10039     .padding(1)
10040     .kernel_size(3, 3)
10041     .groups(2)
10042     .group_input_channels(23)
10043     .group_output_channels(xnn_params.f16.gemm.nr + 3)
10044     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
10045     .iterations(3)
10046     .TestF16();
10047 }
10048 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_qmin)10049 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_qmin) {
10050   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10051   DeconvolutionOperatorTester()
10052     .batch_size(2)
10053     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10054     .padding(1)
10055     .kernel_size(3, 3)
10056     .groups(2)
10057     .group_input_channels(23)
10058     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10059     .qmin(128)
10060     .iterations(3)
10061     .TestF16();
10062 }
10063 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_with_qmax)10064 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_with_qmax) {
10065   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10066   DeconvolutionOperatorTester()
10067     .batch_size(2)
10068     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10069     .padding(1)
10070     .kernel_size(3, 3)
10071     .groups(2)
10072     .group_input_channels(23)
10073     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10074     .qmax(128)
10075     .iterations(3)
10076     .TestF16();
10077 }
10078 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3_without_bias)10079 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3_without_bias) {
10080   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10081   DeconvolutionOperatorTester()
10082     .has_bias(false)
10083     .batch_size(2)
10084     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10085     .padding(1)
10086     .kernel_size(3, 3)
10087     .groups(2)
10088     .group_input_channels(23)
10089     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10090     .iterations(3)
10091     .TestF16();
10092 }
10093 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_grouped_3x3)10094 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_grouped_3x3) {
10095   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10096   DeconvolutionOperatorTester()
10097     .batch_size(2)
10098     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10099     .padding(1)
10100     .kernel_size(3, 3)
10101     .groups(2)
10102     .group_input_channels(15)
10103     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10104     .use_weights_cache(true)
10105     .iterations(3)
10106     .TestF16();
10107 }
10108 
10109 /**************************** CONV path, setup ****************************/
10110 
10111 TEST(DECONVOLUTION_NHWC_F16, 3x3_setup_changing_batch) {
10112   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10113   DeconvolutionOperatorTester()
10114     .batch_size(2)
10115     .next_batch_size(5)
10116     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10117     .kernel_height(3)
10118     .kernel_width(5)
10119     .groups(2)
10120     .group_input_channels(15)
10121     .group_output_channels(17)
10122     .TestSetupF16();
10123 }
10124 
10125 TEST(DECONVOLUTION_NHWC_F16, 3x3_setup_changing_height) {
10126   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10127   DeconvolutionOperatorTester()
10128     .batch_size(2)
10129     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10130     .next_input_height(kUnstridedInputHeight + 3)
10131     .kernel_height(3)
10132     .kernel_width(5)
10133     .groups(2)
10134     .group_input_channels(15)
10135     .group_output_channels(17)
10136     .TestSetupF16();
10137 }
10138 
10139 TEST(DECONVOLUTION_NHWC_F16, 3x3_setup_changing_width) {
10140   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10141   DeconvolutionOperatorTester()
10142     .batch_size(2)
10143     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
10144     .next_input_width(kUnstridedInputWidth + 3)
10145     .kernel_height(3)
10146     .kernel_width(5)
10147     .groups(2)
10148     .group_input_channels(15)
10149     .group_output_channels(17)
10150     .TestSetupF16();
10151 }
10152 
10153 /**************************** SUBCONV2D/IGEMM path ****************************/
10154 
10155 TEST(DECONVOLUTION_NHWC_F16, 3x3s2) {
10156   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10157   DeconvolutionOperatorTester()
10158     .input_size(kStridedInputHeight, kStridedInputWidth)
10159     .padding(1)
10160     .kernel_size(3, 3)
10161     .stride(2)
10162     .group_input_channels(15)
10163     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10164     .iterations(3)
10165     .TestF16();
10166 }
10167 
10168 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_fp32_weights) {
10169   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10170   DeconvolutionOperatorTester()
10171     .input_size(kStridedInputHeight, kStridedInputWidth)
10172     .padding(1)
10173     .kernel_size(3, 3)
10174     .stride(2)
10175     .group_input_channels(15)
10176     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10177     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
10178     .iterations(3)
10179     .TestF16();
10180 }
10181 
TEST(DECONVOLUTION_NHWC_F16,Kx3s2)10182 TEST(DECONVOLUTION_NHWC_F16, Kx3s2) {
10183   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10184   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
10185     DeconvolutionOperatorTester()
10186       .input_size(kStridedInputHeight, kStridedInputWidth)
10187       .padding_width(1)
10188       .kernel_size(kernel_height, 3)
10189       .stride(2)
10190       .group_input_channels(17)
10191       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10192       .iterations(3)
10193       .TestF16();
10194   }
10195 }
10196 
10197 TEST(DECONVOLUTION_NHWC_F16, 3xKs2) {
10198   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10199   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
10200     DeconvolutionOperatorTester()
10201       .input_size(kStridedInputHeight, kStridedInputWidth)
10202       .padding_height(1)
10203       .kernel_size(3, kernel_width)
10204       .stride(2)
10205       .group_input_channels(17)
10206       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10207       .iterations(3)
10208       .TestF16();
10209   }
10210 }
10211 
10212 TEST(DECONVOLUTION_NHWC_F16, 3x3sSx1) {
10213   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10214   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
10215     DeconvolutionOperatorTester()
10216       .input_size(kStridedInputHeight, kStridedInputWidth)
10217       .padding(1)
10218       .padding_width(1)
10219       .kernel_size(3, 3)
10220       .stride_height(stride_height)
10221       .group_input_channels(17)
10222       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10223       .iterations(3)
10224       .TestF16();
10225   }
10226 }
10227 
10228 TEST(DECONVOLUTION_NHWC_F16, 3x3s1xS) {
10229   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10230   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
10231     DeconvolutionOperatorTester()
10232       .input_size(kStridedInputHeight, kStridedInputWidth)
10233       .padding(1)
10234       .padding_width(1)
10235       .kernel_size(3, 3)
10236       .stride_width(stride_width)
10237       .group_input_channels(17)
10238       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10239       .iterations(3)
10240       .TestF16();
10241   }
10242 }
10243 
10244 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_height_padding) {
10245   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10246   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
10247     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
10248       DeconvolutionOperatorTester()
10249         .input_size(kStridedInputHeight, kStridedInputWidth)
10250         .padding_width(1)
10251         .padding_top(padding_top)
10252         .padding_bottom(padding_bottom)
10253         .kernel_size(3, 3)
10254         .stride(2)
10255         .group_input_channels(15)
10256         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10257         .iterations(1)
10258         .TestF16();
10259     }
10260   }
10261 }
10262 
10263 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_width_padding) {
10264   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10265   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
10266     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
10267       DeconvolutionOperatorTester()
10268         .input_size(kStridedInputHeight, kStridedInputWidth)
10269         .padding_height(1)
10270         .padding_left(padding_left)
10271         .padding_right(padding_right)
10272         .kernel_size(3, 3)
10273         .stride(2)
10274         .group_input_channels(15)
10275         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10276         .iterations(1)
10277         .TestF16();
10278     }
10279   }
10280 }
10281 
10282 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_height_adjustment) {
10283   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10284   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
10285     DeconvolutionOperatorTester()
10286       .input_size(kStridedInputHeight, kStridedInputWidth)
10287       .padding(1)
10288       .adjustment_height(adjustment_height)
10289       .kernel_size(3, 3)
10290       .stride(2)
10291       .group_input_channels(15)
10292       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10293       .iterations(1)
10294       .TestF16();
10295   }
10296 }
10297 
10298 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_width_adjustment) {
10299   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10300   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
10301     DeconvolutionOperatorTester()
10302       .input_size(kStridedInputHeight, kStridedInputWidth)
10303       .padding(1)
10304       .adjustment_width(adjustment_width)
10305       .kernel_size(3, 3)
10306       .stride(2)
10307       .group_input_channels(15)
10308       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10309       .iterations(1)
10310       .TestF16();
10311   }
10312 }
10313 
10314 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_input_height) {
10315   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10316   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
10317     DeconvolutionOperatorTester()
10318       .input_size(input_height, kStridedInputWidth)
10319       .padding(1)
10320       .kernel_size(3, 3)
10321       .stride(2)
10322       .group_input_channels(15)
10323       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10324       .iterations(1)
10325       .TestF16();
10326   }
10327 }
10328 
10329 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_input_width) {
10330   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10331   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
10332     DeconvolutionOperatorTester()
10333       .input_size(kStridedInputHeight, kStridedInputWidth)
10334       .padding(1)
10335       .kernel_size(3, 3)
10336       .stride(2)
10337       .group_input_channels(15)
10338       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10339       .iterations(1)
10340       .TestF16();
10341   }
10342 }
10343 
10344 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_input_channels) {
10345   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10346   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
10347     DeconvolutionOperatorTester()
10348       .input_size(kStridedInputHeight, kStridedInputWidth)
10349       .padding(1)
10350       .kernel_size(3, 3)
10351       .stride(2)
10352       .group_input_channels(input_channels)
10353       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10354       .iterations(1)
10355       .TestF16();
10356   }
10357 }
10358 
10359 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_varying_output_channels) {
10360   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10361   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
10362     DeconvolutionOperatorTester()
10363       .input_size(kStridedInputHeight, kStridedInputWidth)
10364       .padding(1)
10365       .kernel_size(3, 3)
10366       .stride(2)
10367       .group_input_channels(23)
10368       .group_output_channels(output_channels)
10369       .iterations(1)
10370       .TestF16();
10371   }
10372 }
10373 
10374 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_input_stride) {
10375   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10376   DeconvolutionOperatorTester()
10377     .input_size(kStridedInputHeight, kStridedInputWidth)
10378     .padding(1)
10379     .kernel_size(3, 3)
10380     .stride(2)
10381     .group_input_channels(23)
10382     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10383     .input_pixel_stride(28)
10384     .iterations(3)
10385     .TestF16();
10386 }
10387 
10388 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_output_stride) {
10389   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10390   DeconvolutionOperatorTester()
10391     .input_size(kStridedInputHeight, kStridedInputWidth)
10392     .padding(1)
10393     .kernel_size(3, 3)
10394     .stride(2)
10395     .group_input_channels(23)
10396     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10397     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
10398     .iterations(3)
10399     .TestF16();
10400 }
10401 
10402 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_qmin) {
10403   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10404   DeconvolutionOperatorTester()
10405     .input_size(kStridedInputHeight, kStridedInputWidth)
10406     .padding(1)
10407     .kernel_size(3, 3)
10408     .stride(2)
10409     .group_input_channels(23)
10410     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10411     .qmin(128)
10412     .iterations(3)
10413     .TestF16();
10414 }
10415 
10416 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_with_qmax) {
10417   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10418   DeconvolutionOperatorTester()
10419     .input_size(kStridedInputHeight, kStridedInputWidth)
10420     .padding(1)
10421     .kernel_size(3, 3)
10422     .stride(2)
10423     .group_input_channels(23)
10424     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10425     .qmax(128)
10426     .iterations(3)
10427     .TestF16();
10428 }
10429 
10430 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_without_bias) {
10431   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10432   DeconvolutionOperatorTester()
10433     .has_bias(false)
10434     .input_size(kStridedInputHeight, kStridedInputWidth)
10435     .padding(1)
10436     .kernel_size(3, 3)
10437     .stride(2)
10438     .group_input_channels(23)
10439     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10440     .iterations(3)
10441     .TestF16();
10442 }
10443 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_3x3s2)10444 TEST(DECONVOLUTION_NHWC_F16, weights_cache_3x3s2) {
10445   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10446   DeconvolutionOperatorTester()
10447     .input_size(kStridedInputHeight, kStridedInputWidth)
10448     .padding(1)
10449     .kernel_size(3, 3)
10450     .stride(2)
10451     .group_input_channels(15)
10452     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10453     .use_weights_cache(true)
10454     .iterations(3)
10455     .TestF16();
10456 }
10457 
10458 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
10459 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2)10460 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2) {
10461   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10462   DeconvolutionOperatorTester()
10463     .input_size(kStridedInputHeight, kStridedInputWidth)
10464     .padding(1)
10465     .kernel_size(3, 3)
10466     .stride(2)
10467     .groups(2)
10468     .group_input_channels(17)
10469     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10470     .iterations(3)
10471     .TestF16();
10472 }
10473 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_fp32_weights)10474 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_fp32_weights) {
10475   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10476   DeconvolutionOperatorTester()
10477     .input_size(kStridedInputHeight, kStridedInputWidth)
10478     .padding(1)
10479     .kernel_size(3, 3)
10480     .stride(2)
10481     .groups(2)
10482     .group_input_channels(17)
10483     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10484     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
10485     .iterations(3)
10486     .TestF16();
10487 }
10488 
TEST(DECONVOLUTION_NHWC_F16,grouped_Kx3s2)10489 TEST(DECONVOLUTION_NHWC_F16, grouped_Kx3s2) {
10490   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10491   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
10492     DeconvolutionOperatorTester()
10493       .input_size(kStridedInputHeight, kStridedInputWidth)
10494       .padding_width(1)
10495       .kernel_size(kernel_height, 3)
10496       .stride(2)
10497       .groups(2)
10498       .group_input_channels(17)
10499       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10500       .iterations(3)
10501       .TestF16();
10502   }
10503 }
10504 
TEST(DECONVOLUTION_NHWC_F16,grouped_3xKs2)10505 TEST(DECONVOLUTION_NHWC_F16, grouped_3xKs2) {
10506   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10507   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
10508     DeconvolutionOperatorTester()
10509       .input_size(kStridedInputHeight, kStridedInputWidth)
10510       .padding_height(1)
10511       .kernel_size(3, kernel_width)
10512       .stride(2)
10513       .groups(2)
10514       .group_input_channels(17)
10515       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10516       .iterations(3)
10517       .TestF16();
10518   }
10519 }
10520 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3sSx1)10521 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3sSx1) {
10522   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10523   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
10524     DeconvolutionOperatorTester()
10525       .input_size(kStridedInputHeight, kStridedInputWidth)
10526       .padding(1)
10527       .padding_width(1)
10528       .kernel_size(3, 3)
10529       .stride_height(stride_height)
10530       .groups(2)
10531       .group_input_channels(17)
10532       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10533       .iterations(3)
10534       .TestF16();
10535   }
10536 }
10537 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s1xS)10538 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s1xS) {
10539   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10540   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
10541     DeconvolutionOperatorTester()
10542       .input_size(kStridedInputHeight, kStridedInputWidth)
10543       .padding(1)
10544       .padding_width(1)
10545       .kernel_size(3, 3)
10546       .stride_width(stride_width)
10547       .groups(2)
10548       .group_input_channels(17)
10549       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10550       .iterations(3)
10551       .TestF16();
10552   }
10553 }
10554 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_height_padding)10555 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_height_padding) {
10556   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10557   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
10558     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
10559       DeconvolutionOperatorTester()
10560         .input_size(kStridedInputHeight, kStridedInputWidth)
10561         .padding_width(1)
10562         .padding_top(padding_top)
10563         .padding_bottom(padding_bottom)
10564         .kernel_size(3, 3)
10565         .stride(2)
10566         .groups(2)
10567         .group_input_channels(17)
10568         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10569         .iterations(1)
10570         .TestF16();
10571     }
10572   }
10573 }
10574 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_width_padding)10575 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_width_padding) {
10576   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10577   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
10578     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
10579       DeconvolutionOperatorTester()
10580         .input_size(kStridedInputHeight, kStridedInputWidth)
10581         .padding_height(1)
10582         .padding_left(padding_left)
10583         .padding_right(padding_right)
10584         .kernel_size(3, 3)
10585         .stride(2)
10586         .groups(2)
10587         .group_input_channels(17)
10588         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10589         .iterations(1)
10590         .TestF16();
10591     }
10592   }
10593 }
10594 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_height_adjustment)10595 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_height_adjustment) {
10596   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10597   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
10598     DeconvolutionOperatorTester()
10599       .input_size(kStridedInputHeight, kStridedInputWidth)
10600       .padding(1)
10601       .adjustment_height(adjustment_height)
10602       .kernel_size(3, 3)
10603       .stride(2)
10604       .groups(2)
10605       .group_input_channels(17)
10606       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10607       .iterations(1)
10608       .TestF16();
10609   }
10610 }
10611 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_width_adjustment)10612 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_width_adjustment) {
10613   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10614   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
10615     DeconvolutionOperatorTester()
10616       .input_size(kStridedInputHeight, kStridedInputWidth)
10617       .padding(1)
10618       .adjustment_width(adjustment_width)
10619       .kernel_size(3, 3)
10620       .stride(2)
10621       .groups(2)
10622       .group_input_channels(17)
10623       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10624       .iterations(1)
10625       .TestF16();
10626   }
10627 }
10628 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_input_height)10629 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_input_height) {
10630   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10631   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
10632     DeconvolutionOperatorTester()
10633       .input_size(input_height, kStridedInputWidth)
10634       .padding(1)
10635       .kernel_size(3, 3)
10636       .stride(2)
10637       .groups(2)
10638       .group_input_channels(17)
10639       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10640       .iterations(1)
10641       .TestF16();
10642   }
10643 }
10644 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_input_width)10645 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_input_width) {
10646   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10647   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
10648     DeconvolutionOperatorTester()
10649       .input_size(kStridedInputHeight, kStridedInputWidth)
10650       .padding(1)
10651       .kernel_size(3, 3)
10652       .stride(2)
10653       .groups(2)
10654       .group_input_channels(17)
10655       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10656       .iterations(1)
10657       .TestF16();
10658   }
10659 }
10660 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_input_channels)10661 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_input_channels) {
10662   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10663   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
10664     DeconvolutionOperatorTester()
10665       .input_size(kStridedInputHeight, kStridedInputWidth)
10666       .padding(1)
10667       .kernel_size(3, 3)
10668       .stride(2)
10669       .groups(2)
10670       .group_input_channels(input_channels)
10671       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10672       .iterations(1)
10673       .TestF16();
10674   }
10675 }
10676 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_varying_output_channels)10677 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_varying_output_channels) {
10678   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10679   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
10680     DeconvolutionOperatorTester()
10681       .input_size(kStridedInputHeight, kStridedInputWidth)
10682       .padding(1)
10683       .kernel_size(3, 3)
10684       .stride(2)
10685       .groups(2)
10686       .group_input_channels(17)
10687       .group_output_channels(output_channels)
10688       .iterations(1)
10689       .TestF16();
10690   }
10691 }
10692 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_input_stride)10693 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_input_stride) {
10694   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10695   DeconvolutionOperatorTester()
10696     .input_size(kStridedInputHeight, kStridedInputWidth)
10697     .padding(1)
10698     .kernel_size(3, 3)
10699     .stride(2)
10700     .groups(2)
10701     .group_input_channels(17)
10702     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10703     .input_pixel_stride(37)
10704     .iterations(3)
10705     .TestF16();
10706 }
10707 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_output_stride)10708 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_output_stride) {
10709   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10710   DeconvolutionOperatorTester()
10711     .input_size(kStridedInputHeight, kStridedInputWidth)
10712     .padding(1)
10713     .kernel_size(3, 3)
10714     .stride(2)
10715     .groups(2)
10716     .group_input_channels(17)
10717     .group_output_channels(xnn_params.f16.gemm.nr + 3)
10718     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
10719     .iterations(3)
10720     .TestF16();
10721 }
10722 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_qmin)10723 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_qmin) {
10724   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10725   DeconvolutionOperatorTester()
10726     .input_size(kStridedInputHeight, kStridedInputWidth)
10727     .padding(1)
10728     .kernel_size(3, 3)
10729     .stride(2)
10730     .groups(2)
10731     .group_input_channels(17)
10732     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10733     .qmin(128)
10734     .iterations(3)
10735     .TestF16();
10736 }
10737 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_with_qmax)10738 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_with_qmax) {
10739   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10740   DeconvolutionOperatorTester()
10741     .input_size(kStridedInputHeight, kStridedInputWidth)
10742     .padding(1)
10743     .kernel_size(3, 3)
10744     .stride(2)
10745     .groups(2)
10746     .group_input_channels(17)
10747     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10748     .qmax(128)
10749     .iterations(3)
10750     .TestF16();
10751 }
10752 
TEST(DECONVOLUTION_NHWC_F16,grouped_3x3s2_without_bias)10753 TEST(DECONVOLUTION_NHWC_F16, grouped_3x3s2_without_bias) {
10754   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10755   DeconvolutionOperatorTester()
10756     .has_bias(false)
10757     .input_size(kStridedInputHeight, kStridedInputWidth)
10758     .padding(1)
10759     .kernel_size(3, 3)
10760     .stride(2)
10761     .groups(2)
10762     .group_input_channels(17)
10763     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10764     .iterations(3)
10765     .TestF16();
10766 }
10767 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_grouped_3x3s2)10768 TEST(DECONVOLUTION_NHWC_F16, weights_cache_grouped_3x3s2) {
10769   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10770   DeconvolutionOperatorTester()
10771     .input_size(kStridedInputHeight, kStridedInputWidth)
10772     .padding(1)
10773     .kernel_size(3, 3)
10774     .stride(2)
10775     .groups(2)
10776     .group_input_channels(17)
10777     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10778     .use_weights_cache(true)
10779     .iterations(3)
10780     .TestF16();
10781 }
10782 
10783 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
10784 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2)10785 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2) {
10786   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10787   DeconvolutionOperatorTester()
10788     .batch_size(2)
10789     .input_size(kStridedInputHeight, kStridedInputWidth)
10790     .padding(1)
10791     .kernel_size(3, 3)
10792     .stride(2)
10793     .group_input_channels(15)
10794     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10795     .iterations(3)
10796     .TestF16();
10797 }
10798 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_fp32_weights)10799 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_fp32_weights) {
10800   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10801   DeconvolutionOperatorTester()
10802     .batch_size(2)
10803     .input_size(kStridedInputHeight, kStridedInputWidth)
10804     .padding(1)
10805     .kernel_size(3, 3)
10806     .stride(2)
10807     .group_input_channels(15)
10808     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10809     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
10810     .iterations(3)
10811     .TestF16();
10812 }
10813 
TEST(DECONVOLUTION_NHWC_F16,batched_Kx3s2)10814 TEST(DECONVOLUTION_NHWC_F16, batched_Kx3s2) {
10815   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10816   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
10817     DeconvolutionOperatorTester()
10818       .batch_size(2)
10819       .input_size(kStridedInputHeight, kStridedInputWidth)
10820       .padding_width(1)
10821       .kernel_size(kernel_height, 3)
10822       .stride(2)
10823       .group_input_channels(17)
10824       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10825       .iterations(3)
10826       .TestF16();
10827   }
10828 }
10829 
TEST(DECONVOLUTION_NHWC_F16,batched_3xKs2)10830 TEST(DECONVOLUTION_NHWC_F16, batched_3xKs2) {
10831   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10832   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
10833     DeconvolutionOperatorTester()
10834       .batch_size(2)
10835       .input_size(kStridedInputHeight, kStridedInputWidth)
10836       .padding_height(1)
10837       .kernel_size(3, kernel_width)
10838       .stride(2)
10839       .group_input_channels(17)
10840       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10841       .iterations(3)
10842       .TestF16();
10843   }
10844 }
10845 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3sSx1)10846 TEST(DECONVOLUTION_NHWC_F16, batched_3x3sSx1) {
10847   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10848   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
10849     DeconvolutionOperatorTester()
10850       .batch_size(2)
10851       .input_size(kStridedInputHeight, kStridedInputWidth)
10852       .padding(1)
10853       .padding_width(1)
10854       .kernel_size(3, 3)
10855       .stride_height(stride_height)
10856       .group_input_channels(17)
10857       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10858       .iterations(3)
10859       .TestF16();
10860   }
10861 }
10862 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s1xS)10863 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s1xS) {
10864   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10865   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
10866     DeconvolutionOperatorTester()
10867       .batch_size(2)
10868       .input_size(kStridedInputHeight, kStridedInputWidth)
10869       .padding(1)
10870       .padding_width(1)
10871       .kernel_size(3, 3)
10872       .stride_width(stride_width)
10873       .group_input_channels(17)
10874       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10875       .iterations(3)
10876       .TestF16();
10877   }
10878 }
10879 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_height_padding)10880 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_height_padding) {
10881   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10882   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
10883     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
10884       DeconvolutionOperatorTester()
10885         .batch_size(2)
10886         .input_size(kStridedInputHeight, kStridedInputWidth)
10887         .padding_width(1)
10888         .padding_top(padding_top)
10889         .padding_bottom(padding_bottom)
10890         .kernel_size(3, 3)
10891         .stride(2)
10892         .group_input_channels(15)
10893         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10894         .iterations(1)
10895         .TestF16();
10896     }
10897   }
10898 }
10899 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_width_padding)10900 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_width_padding) {
10901   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10902   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
10903     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
10904       DeconvolutionOperatorTester()
10905         .batch_size(2)
10906         .input_size(kStridedInputHeight, kStridedInputWidth)
10907         .padding_height(1)
10908         .padding_left(padding_left)
10909         .padding_right(padding_right)
10910         .kernel_size(3, 3)
10911         .stride(2)
10912         .group_input_channels(15)
10913         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10914         .iterations(1)
10915         .TestF16();
10916     }
10917   }
10918 }
10919 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_height_adjustment)10920 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_height_adjustment) {
10921   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10922   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
10923     DeconvolutionOperatorTester()
10924       .batch_size(2)
10925       .input_size(kStridedInputHeight, kStridedInputWidth)
10926       .padding(1)
10927       .adjustment_height(adjustment_height)
10928       .kernel_size(3, 3)
10929       .stride(2)
10930       .group_input_channels(15)
10931       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10932       .iterations(1)
10933       .TestF16();
10934   }
10935 }
10936 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_width_adjustment)10937 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_width_adjustment) {
10938   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10939   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
10940     DeconvolutionOperatorTester()
10941       .batch_size(2)
10942       .input_size(kStridedInputHeight, kStridedInputWidth)
10943       .padding(1)
10944       .adjustment_width(adjustment_width)
10945       .kernel_size(3, 3)
10946       .stride(2)
10947       .group_input_channels(15)
10948       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10949       .iterations(1)
10950       .TestF16();
10951   }
10952 }
10953 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_input_height)10954 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_input_height) {
10955   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10956   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
10957     DeconvolutionOperatorTester()
10958       .batch_size(2)
10959       .input_size(input_height, kStridedInputWidth)
10960       .padding(1)
10961       .kernel_size(3, 3)
10962       .stride(2)
10963       .group_input_channels(15)
10964       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10965       .iterations(1)
10966       .TestF16();
10967   }
10968 }
10969 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_input_width)10970 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_input_width) {
10971   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10972   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
10973     DeconvolutionOperatorTester()
10974       .batch_size(2)
10975       .input_size(kStridedInputHeight, kStridedInputWidth)
10976       .padding(1)
10977       .kernel_size(3, 3)
10978       .stride(2)
10979       .group_input_channels(15)
10980       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10981       .iterations(1)
10982       .TestF16();
10983   }
10984 }
10985 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_input_channels)10986 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_input_channels) {
10987   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
10988   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
10989     DeconvolutionOperatorTester()
10990       .batch_size(2)
10991       .input_size(kStridedInputHeight, kStridedInputWidth)
10992       .padding(1)
10993       .kernel_size(3, 3)
10994       .stride(2)
10995       .group_input_channels(input_channels)
10996       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
10997       .iterations(1)
10998       .TestF16();
10999   }
11000 }
11001 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_varying_output_channels)11002 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_varying_output_channels) {
11003   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11004   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11005     DeconvolutionOperatorTester()
11006       .batch_size(2)
11007       .input_size(kStridedInputHeight, kStridedInputWidth)
11008       .padding(1)
11009       .kernel_size(3, 3)
11010       .stride(2)
11011       .group_input_channels(23)
11012       .group_output_channels(output_channels)
11013       .iterations(1)
11014       .TestF16();
11015   }
11016 }
11017 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_input_stride)11018 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_input_stride) {
11019   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11020   DeconvolutionOperatorTester()
11021     .batch_size(2)
11022     .input_size(kStridedInputHeight, kStridedInputWidth)
11023     .padding(1)
11024     .kernel_size(3, 3)
11025     .stride(2)
11026     .group_input_channels(23)
11027     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11028     .input_pixel_stride(28)
11029     .iterations(3)
11030     .TestF16();
11031 }
11032 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_output_stride)11033 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_output_stride) {
11034   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11035   DeconvolutionOperatorTester()
11036     .batch_size(2)
11037     .input_size(kStridedInputHeight, kStridedInputWidth)
11038     .padding(1)
11039     .kernel_size(3, 3)
11040     .stride(2)
11041     .group_input_channels(23)
11042     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11043     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11044     .iterations(3)
11045     .TestF16();
11046 }
11047 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_qmin)11048 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_qmin) {
11049   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11050   DeconvolutionOperatorTester()
11051     .batch_size(2)
11052     .input_size(kStridedInputHeight, kStridedInputWidth)
11053     .padding(1)
11054     .kernel_size(3, 3)
11055     .stride(2)
11056     .group_input_channels(23)
11057     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11058     .qmin(128)
11059     .iterations(3)
11060     .TestF16();
11061 }
11062 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_with_qmax)11063 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_with_qmax) {
11064   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11065   DeconvolutionOperatorTester()
11066     .batch_size(2)
11067     .input_size(kStridedInputHeight, kStridedInputWidth)
11068     .padding(1)
11069     .kernel_size(3, 3)
11070     .stride(2)
11071     .group_input_channels(23)
11072     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11073     .qmax(128)
11074     .iterations(3)
11075     .TestF16();
11076 }
11077 
TEST(DECONVOLUTION_NHWC_F16,batched_3x3s2_without_bias)11078 TEST(DECONVOLUTION_NHWC_F16, batched_3x3s2_without_bias) {
11079   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11080   DeconvolutionOperatorTester()
11081     .has_bias(false)
11082     .batch_size(2)
11083     .input_size(kStridedInputHeight, kStridedInputWidth)
11084     .padding(1)
11085     .kernel_size(3, 3)
11086     .stride(2)
11087     .group_input_channels(23)
11088     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11089     .iterations(3)
11090     .TestF16();
11091 }
11092 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_3x3s2)11093 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_3x3s2) {
11094   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11095   DeconvolutionOperatorTester()
11096     .batch_size(2)
11097     .input_size(kStridedInputHeight, kStridedInputWidth)
11098     .padding(1)
11099     .kernel_size(3, 3)
11100     .stride(2)
11101     .group_input_channels(15)
11102     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11103     .use_weights_cache(true)
11104     .iterations(3)
11105     .TestF16();
11106 }
11107 
11108 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
11109 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2)11110 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2) {
11111   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11112   DeconvolutionOperatorTester()
11113     .batch_size(2)
11114     .input_size(kStridedInputHeight, kStridedInputWidth)
11115     .padding(1)
11116     .kernel_size(3, 3)
11117     .stride(2)
11118     .groups(2)
11119     .group_input_channels(17)
11120     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11121     .iterations(3)
11122     .TestF16();
11123 }
11124 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_fp32_weights)11125 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_fp32_weights) {
11126   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11127   DeconvolutionOperatorTester()
11128     .batch_size(2)
11129     .input_size(kStridedInputHeight, kStridedInputWidth)
11130     .padding(1)
11131     .kernel_size(3, 3)
11132     .stride(2)
11133     .groups(2)
11134     .group_input_channels(17)
11135     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11136     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11137     .iterations(3)
11138     .TestF16();
11139 }
11140 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_Kx3s2)11141 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_Kx3s2) {
11142   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11143   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
11144     DeconvolutionOperatorTester()
11145       .batch_size(2)
11146       .input_size(kStridedInputHeight, kStridedInputWidth)
11147       .padding_width(1)
11148       .kernel_size(kernel_height, 3)
11149       .stride(2)
11150       .groups(2)
11151       .group_input_channels(17)
11152       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11153       .iterations(3)
11154       .TestF16();
11155   }
11156 }
11157 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3xKs2)11158 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3xKs2) {
11159   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11160   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
11161     DeconvolutionOperatorTester()
11162       .batch_size(2)
11163       .input_size(kStridedInputHeight, kStridedInputWidth)
11164       .padding_height(1)
11165       .kernel_size(3, kernel_width)
11166       .stride(2)
11167       .groups(2)
11168       .group_input_channels(17)
11169       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11170       .iterations(3)
11171       .TestF16();
11172   }
11173 }
11174 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3sSx1)11175 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3sSx1) {
11176   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11177   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
11178     DeconvolutionOperatorTester()
11179       .batch_size(2)
11180       .input_size(kStridedInputHeight, kStridedInputWidth)
11181       .padding(1)
11182       .padding_width(1)
11183       .kernel_size(3, 3)
11184       .stride_height(stride_height)
11185       .groups(2)
11186       .group_input_channels(17)
11187       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11188       .iterations(3)
11189       .TestF16();
11190   }
11191 }
11192 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s1xS)11193 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s1xS) {
11194   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11195   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
11196     DeconvolutionOperatorTester()
11197       .batch_size(2)
11198       .input_size(kStridedInputHeight, kStridedInputWidth)
11199       .padding(1)
11200       .padding_width(1)
11201       .kernel_size(3, 3)
11202       .stride_width(stride_width)
11203       .groups(2)
11204       .group_input_channels(17)
11205       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11206       .iterations(3)
11207       .TestF16();
11208   }
11209 }
11210 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_height_padding)11211 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_height_padding) {
11212   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11213   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
11214     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
11215       DeconvolutionOperatorTester()
11216         .batch_size(2)
11217         .input_size(kStridedInputHeight, kStridedInputWidth)
11218         .padding_width(1)
11219         .padding_top(padding_top)
11220         .padding_bottom(padding_bottom)
11221         .kernel_size(3, 3)
11222         .stride(2)
11223         .groups(2)
11224         .group_input_channels(17)
11225         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11226         .iterations(1)
11227         .TestF16();
11228     }
11229   }
11230 }
11231 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_width_padding)11232 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_width_padding) {
11233   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11234   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
11235     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
11236       DeconvolutionOperatorTester()
11237         .batch_size(2)
11238         .input_size(kStridedInputHeight, kStridedInputWidth)
11239         .padding_height(1)
11240         .padding_left(padding_left)
11241         .padding_right(padding_right)
11242         .kernel_size(3, 3)
11243         .stride(2)
11244         .groups(2)
11245         .group_input_channels(17)
11246         .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11247         .iterations(1)
11248         .TestF16();
11249     }
11250   }
11251 }
11252 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_height_adjustment)11253 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_height_adjustment) {
11254   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11255   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
11256     DeconvolutionOperatorTester()
11257       .batch_size(2)
11258       .input_size(kStridedInputHeight, kStridedInputWidth)
11259       .padding(1)
11260       .adjustment_height(adjustment_height)
11261       .kernel_size(3, 3)
11262       .stride(2)
11263       .groups(2)
11264       .group_input_channels(17)
11265       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11266       .iterations(1)
11267       .TestF16();
11268   }
11269 }
11270 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_width_adjustment)11271 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_width_adjustment) {
11272   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11273   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
11274     DeconvolutionOperatorTester()
11275       .batch_size(2)
11276       .input_size(kStridedInputHeight, kStridedInputWidth)
11277       .padding(1)
11278       .adjustment_width(adjustment_width)
11279       .kernel_size(3, 3)
11280       .stride(2)
11281       .groups(2)
11282       .group_input_channels(17)
11283       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11284       .iterations(1)
11285       .TestF16();
11286   }
11287 }
11288 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_input_height)11289 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_input_height) {
11290   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11291   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
11292     DeconvolutionOperatorTester()
11293       .batch_size(2)
11294       .input_size(input_height, kStridedInputWidth)
11295       .padding(1)
11296       .kernel_size(3, 3)
11297       .stride(2)
11298       .groups(2)
11299       .group_input_channels(17)
11300       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11301       .iterations(1)
11302       .TestF16();
11303   }
11304 }
11305 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_input_width)11306 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_input_width) {
11307   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11308   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
11309     DeconvolutionOperatorTester()
11310       .batch_size(2)
11311       .input_size(kStridedInputHeight, kStridedInputWidth)
11312       .padding(1)
11313       .kernel_size(3, 3)
11314       .stride(2)
11315       .groups(2)
11316       .group_input_channels(17)
11317       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11318       .iterations(1)
11319       .TestF16();
11320   }
11321 }
11322 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_input_channels)11323 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_input_channels) {
11324   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11325   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
11326     DeconvolutionOperatorTester()
11327       .batch_size(2)
11328       .input_size(kStridedInputHeight, kStridedInputWidth)
11329       .padding(1)
11330       .kernel_size(3, 3)
11331       .stride(2)
11332       .groups(2)
11333       .group_input_channels(input_channels)
11334       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11335       .iterations(1)
11336       .TestF16();
11337   }
11338 }
11339 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_varying_output_channels)11340 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_varying_output_channels) {
11341   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11342   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11343     DeconvolutionOperatorTester()
11344       .batch_size(2)
11345       .input_size(kStridedInputHeight, kStridedInputWidth)
11346       .padding(1)
11347       .kernel_size(3, 3)
11348       .stride(2)
11349       .groups(2)
11350       .group_input_channels(17)
11351       .group_output_channels(output_channels)
11352       .iterations(1)
11353       .TestF16();
11354   }
11355 }
11356 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_input_stride)11357 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_input_stride) {
11358   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11359   DeconvolutionOperatorTester()
11360     .batch_size(2)
11361     .input_size(kStridedInputHeight, kStridedInputWidth)
11362     .padding(1)
11363     .kernel_size(3, 3)
11364     .stride(2)
11365     .groups(2)
11366     .group_input_channels(17)
11367     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11368     .input_pixel_stride(37)
11369     .iterations(3)
11370     .TestF16();
11371 }
11372 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_output_stride)11373 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_output_stride) {
11374   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11375   DeconvolutionOperatorTester()
11376     .batch_size(2)
11377     .input_size(kStridedInputHeight, kStridedInputWidth)
11378     .padding(1)
11379     .kernel_size(3, 3)
11380     .stride(2)
11381     .groups(2)
11382     .group_input_channels(17)
11383     .group_output_channels(xnn_params.f16.gemm.nr + 3)
11384     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11385     .iterations(3)
11386     .TestF16();
11387 }
11388 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_qmin)11389 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_qmin) {
11390   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11391   DeconvolutionOperatorTester()
11392     .batch_size(2)
11393     .input_size(kStridedInputHeight, kStridedInputWidth)
11394     .padding(1)
11395     .kernel_size(3, 3)
11396     .stride(2)
11397     .groups(2)
11398     .group_input_channels(17)
11399     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11400     .qmin(128)
11401     .iterations(3)
11402     .TestF16();
11403 }
11404 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_with_qmax)11405 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_with_qmax) {
11406   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11407   DeconvolutionOperatorTester()
11408     .batch_size(2)
11409     .input_size(kStridedInputHeight, kStridedInputWidth)
11410     .padding(1)
11411     .kernel_size(3, 3)
11412     .stride(2)
11413     .groups(2)
11414     .group_input_channels(17)
11415     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11416     .qmax(128)
11417     .iterations(3)
11418     .TestF16();
11419 }
11420 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_3x3s2_without_bias)11421 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_3x3s2_without_bias) {
11422   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11423   DeconvolutionOperatorTester()
11424     .has_bias(false)
11425     .batch_size(2)
11426     .input_size(kStridedInputHeight, kStridedInputWidth)
11427     .padding(1)
11428     .kernel_size(3, 3)
11429     .stride(2)
11430     .groups(2)
11431     .group_input_channels(17)
11432     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11433     .iterations(3)
11434     .TestF16();
11435 }
11436 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_grouped_3x3s2)11437 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_grouped_3x3s2) {
11438   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11439   DeconvolutionOperatorTester()
11440     .batch_size(2)
11441     .input_size(kStridedInputHeight, kStridedInputWidth)
11442     .padding(1)
11443     .kernel_size(3, 3)
11444     .stride(2)
11445     .groups(2)
11446     .group_input_channels(17)
11447     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11448     .use_weights_cache(true)
11449     .iterations(3)
11450     .TestF16();
11451 }
11452 
11453 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
11454 
11455 
11456 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_setup_changing_height) {
11457   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11458   DeconvolutionOperatorTester()
11459     .batch_size(2)
11460     .input_size(kStridedInputHeight, kStridedInputWidth)
11461     .next_input_height(kStridedInputHeight + 3)
11462     .kernel_size(3, 3)
11463     .stride(2)
11464     .groups(2)
11465     .group_input_channels(15)
11466     .group_output_channels(17)
11467     .TestSetupF16();
11468 }
11469 
11470 TEST(DECONVOLUTION_NHWC_F16, 3x3s2_setup_changing_width) {
11471   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11472   DeconvolutionOperatorTester()
11473     .batch_size(2)
11474     .input_size(kStridedInputHeight, kStridedInputWidth)
11475     .next_input_width(kStridedInputWidth + 3)
11476     .kernel_size(3, 3)
11477     .stride(2)
11478     .groups(2)
11479     .group_input_channels(15)
11480     .group_output_channels(17)
11481     .TestSetupF16();
11482 }
11483 
11484 /**************************** SUBCONV2D/GEMM path ****************************/
11485 
11486 TEST(DECONVOLUTION_NHWC_F16, 2x2s2) {
11487   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11488   DeconvolutionOperatorTester()
11489     .input_size(kStridedInputHeight, kStridedInputWidth)
11490     .kernel_size(2, 2)
11491     .stride(2)
11492     .group_input_channels(15)
11493     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11494     .iterations(3)
11495     .TestF16();
11496 }
11497 
11498 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_fp32_weights) {
11499   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11500   DeconvolutionOperatorTester()
11501     .input_size(kStridedInputHeight, kStridedInputWidth)
11502     .kernel_size(2, 2)
11503     .stride(2)
11504     .group_input_channels(15)
11505     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11506     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11507     .iterations(3)
11508     .TestF16();
11509 }
11510 
TEST(DECONVOLUTION_NHWC_F16,Kx2sKx2)11511 TEST(DECONVOLUTION_NHWC_F16, Kx2sKx2) {
11512   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11513   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
11514     DeconvolutionOperatorTester()
11515       .input_size(kStridedInputHeight, kStridedInputWidth)
11516       .kernel_size(kernel_height, 2)
11517       .stride(kernel_height, 2)
11518       .group_input_channels(17)
11519       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11520       .iterations(3)
11521       .TestF16();
11522   }
11523 }
11524 
11525 TEST(DECONVOLUTION_NHWC_F16, 2xKs2xK) {
11526   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11527   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
11528     DeconvolutionOperatorTester()
11529       .input_size(kStridedInputHeight, kStridedInputWidth)
11530       .kernel_size(2, kernel_width)
11531       .stride(2, kernel_width)
11532       .group_input_channels(17)
11533       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11534       .iterations(3)
11535       .TestF16();
11536   }
11537 }
11538 
11539 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_height_adjustment) {
11540   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11541   DeconvolutionOperatorTester()
11542     .input_size(kStridedInputHeight, kStridedInputWidth)
11543     .adjustment_height(1)
11544     .kernel_size(2, 2)
11545     .stride(2)
11546     .group_input_channels(15)
11547     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11548     .iterations(1)
11549     .TestF16();
11550 }
11551 
11552 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_width_adjustment) {
11553   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11554   DeconvolutionOperatorTester()
11555     .input_size(kStridedInputHeight, kStridedInputWidth)
11556     .adjustment_width(1)
11557     .kernel_size(2, 2)
11558     .stride(2)
11559     .group_input_channels(15)
11560     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11561     .iterations(1)
11562     .TestF16();
11563 }
11564 
11565 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_input_height) {
11566   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11567   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
11568     DeconvolutionOperatorTester()
11569       .input_size(input_height, kStridedInputWidth)
11570       .kernel_size(2, 2)
11571       .stride(2)
11572       .group_input_channels(15)
11573       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11574       .iterations(1)
11575       .TestF16();
11576   }
11577 }
11578 
11579 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_input_width) {
11580   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11581   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
11582     DeconvolutionOperatorTester()
11583       .input_size(kStridedInputHeight, kStridedInputWidth)
11584       .kernel_size(2, 2)
11585       .stride(2)
11586       .group_input_channels(15)
11587       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11588       .iterations(1)
11589       .TestF16();
11590   }
11591 }
11592 
11593 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_input_channels) {
11594   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11595   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
11596     DeconvolutionOperatorTester()
11597       .input_size(kStridedInputHeight, kStridedInputWidth)
11598       .kernel_size(2, 2)
11599       .stride(2)
11600       .group_input_channels(input_channels)
11601       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11602       .iterations(1)
11603       .TestF16();
11604   }
11605 }
11606 
11607 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_varying_output_channels) {
11608   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11609   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11610     DeconvolutionOperatorTester()
11611       .input_size(kStridedInputHeight, kStridedInputWidth)
11612       .kernel_size(2, 2)
11613       .stride(2)
11614       .group_input_channels(23)
11615       .group_output_channels(output_channels)
11616       .iterations(1)
11617       .TestF16();
11618   }
11619 }
11620 
11621 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_input_stride) {
11622   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11623   DeconvolutionOperatorTester()
11624     .input_size(kStridedInputHeight, kStridedInputWidth)
11625     .kernel_size(2, 2)
11626     .stride(2)
11627     .group_input_channels(23)
11628     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11629     .input_pixel_stride(28)
11630     .iterations(3)
11631     .TestF16();
11632 }
11633 
11634 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_output_stride) {
11635   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11636   DeconvolutionOperatorTester()
11637     .input_size(kStridedInputHeight, kStridedInputWidth)
11638     .kernel_size(2, 2)
11639     .stride(2)
11640     .group_input_channels(23)
11641     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11642     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11643     .iterations(3)
11644     .TestF16();
11645 }
11646 
11647 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_qmin) {
11648   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11649   DeconvolutionOperatorTester()
11650     .input_size(kStridedInputHeight, kStridedInputWidth)
11651     .kernel_size(2, 2)
11652     .stride(2)
11653     .group_input_channels(23)
11654     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11655     .qmin(128)
11656     .iterations(3)
11657     .TestF16();
11658 }
11659 
11660 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_with_qmax) {
11661   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11662   DeconvolutionOperatorTester()
11663     .input_size(kStridedInputHeight, kStridedInputWidth)
11664     .kernel_size(2, 2)
11665     .stride(2)
11666     .group_input_channels(23)
11667     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11668     .qmax(128)
11669     .iterations(3)
11670     .TestF16();
11671 }
11672 
11673 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_without_bias) {
11674   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11675   DeconvolutionOperatorTester()
11676     .has_bias(false)
11677     .input_size(kStridedInputHeight, kStridedInputWidth)
11678     .kernel_size(2, 2)
11679     .stride(2)
11680     .group_input_channels(23)
11681     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11682     .iterations(3)
11683     .TestF16();
11684 }
11685 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_2x2s2)11686 TEST(DECONVOLUTION_NHWC_F16, weights_cache_2x2s2) {
11687   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11688   DeconvolutionOperatorTester()
11689     .input_size(kStridedInputHeight, kStridedInputWidth)
11690     .kernel_size(2, 2)
11691     .stride(2)
11692     .group_input_channels(15)
11693     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11694     .use_weights_cache(true)
11695     .iterations(3)
11696     .TestF16();
11697 }
11698 
11699 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
11700 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2)11701 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2) {
11702   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11703   DeconvolutionOperatorTester()
11704     .input_size(kStridedInputHeight, kStridedInputWidth)
11705     .kernel_size(2, 2)
11706     .stride(2)
11707     .groups(2)
11708     .group_input_channels(17)
11709     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11710     .iterations(3)
11711     .TestF16();
11712 }
11713 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_fp32_weights)11714 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_fp32_weights) {
11715   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11716   DeconvolutionOperatorTester()
11717     .input_size(kStridedInputHeight, kStridedInputWidth)
11718     .kernel_size(2, 2)
11719     .stride(2)
11720     .groups(2)
11721     .group_input_channels(17)
11722     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11723     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11724     .iterations(3)
11725     .TestF16();
11726 }
11727 
TEST(DECONVOLUTION_NHWC_F16,grouped_Kx2sKx2)11728 TEST(DECONVOLUTION_NHWC_F16, grouped_Kx2sKx2) {
11729   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11730   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
11731     DeconvolutionOperatorTester()
11732       .input_size(kStridedInputHeight, kStridedInputWidth)
11733       .kernel_size(kernel_height, 2)
11734       .stride(kernel_height, 2)
11735       .groups(2)
11736       .group_input_channels(17)
11737       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11738       .iterations(3)
11739       .TestF16();
11740   }
11741 }
11742 
TEST(DECONVOLUTION_NHWC_F16,grouped_2xKs2xK)11743 TEST(DECONVOLUTION_NHWC_F16, grouped_2xKs2xK) {
11744   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11745   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
11746     DeconvolutionOperatorTester()
11747       .input_size(kStridedInputHeight, kStridedInputWidth)
11748       .kernel_size(2, kernel_width)
11749       .stride(2, kernel_width)
11750       .groups(2)
11751       .group_input_channels(17)
11752       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11753       .iterations(3)
11754       .TestF16();
11755   }
11756 }
11757 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_height_adjustment)11758 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_height_adjustment) {
11759   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11760   DeconvolutionOperatorTester()
11761     .input_size(kStridedInputHeight, kStridedInputWidth)
11762     .adjustment_height(1)
11763     .kernel_size(2, 2)
11764     .stride(2)
11765     .groups(2)
11766     .group_input_channels(17)
11767     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11768     .iterations(1)
11769     .TestF16();
11770 }
11771 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_width_adjustment)11772 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_width_adjustment) {
11773   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11774   DeconvolutionOperatorTester()
11775     .input_size(kStridedInputHeight, kStridedInputWidth)
11776     .adjustment_width(1)
11777     .kernel_size(2, 2)
11778     .stride(2)
11779     .groups(2)
11780     .group_input_channels(17)
11781     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11782     .iterations(1)
11783     .TestF16();
11784 }
11785 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_input_height)11786 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_input_height) {
11787   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11788   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
11789     DeconvolutionOperatorTester()
11790       .input_size(input_height, kStridedInputWidth)
11791       .kernel_size(2, 2)
11792       .stride(2)
11793       .groups(2)
11794       .group_input_channels(17)
11795       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11796       .iterations(1)
11797       .TestF16();
11798   }
11799 }
11800 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_input_width)11801 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_input_width) {
11802   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11803   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
11804     DeconvolutionOperatorTester()
11805       .input_size(kStridedInputHeight, kStridedInputWidth)
11806       .kernel_size(2, 2)
11807       .stride(2)
11808       .groups(2)
11809       .group_input_channels(17)
11810       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11811       .iterations(1)
11812       .TestF16();
11813   }
11814 }
11815 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_input_channels)11816 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_input_channels) {
11817   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11818   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
11819     DeconvolutionOperatorTester()
11820       .input_size(kStridedInputHeight, kStridedInputWidth)
11821       .kernel_size(2, 2)
11822       .stride(2)
11823       .groups(2)
11824       .group_input_channels(input_channels)
11825       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11826       .iterations(1)
11827       .TestF16();
11828   }
11829 }
11830 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_varying_output_channels)11831 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_varying_output_channels) {
11832   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11833   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
11834     DeconvolutionOperatorTester()
11835       .input_size(kStridedInputHeight, kStridedInputWidth)
11836       .kernel_size(2, 2)
11837       .stride(2)
11838       .groups(2)
11839       .group_input_channels(17)
11840       .group_output_channels(output_channels)
11841       .iterations(1)
11842       .TestF16();
11843   }
11844 }
11845 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_input_stride)11846 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_input_stride) {
11847   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11848   DeconvolutionOperatorTester()
11849     .input_size(kStridedInputHeight, kStridedInputWidth)
11850     .kernel_size(2, 2)
11851     .stride(2)
11852     .groups(2)
11853     .group_input_channels(17)
11854     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11855     .input_pixel_stride(37)
11856     .iterations(3)
11857     .TestF16();
11858 }
11859 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_output_stride)11860 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_output_stride) {
11861   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11862   DeconvolutionOperatorTester()
11863     .input_size(kStridedInputHeight, kStridedInputWidth)
11864     .kernel_size(2, 2)
11865     .stride(2)
11866     .groups(2)
11867     .group_input_channels(17)
11868     .group_output_channels(xnn_params.f16.gemm.nr + 3)
11869     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
11870     .iterations(3)
11871     .TestF16();
11872 }
11873 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_qmin)11874 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_qmin) {
11875   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11876   DeconvolutionOperatorTester()
11877     .input_size(kStridedInputHeight, kStridedInputWidth)
11878     .kernel_size(2, 2)
11879     .stride(2)
11880     .groups(2)
11881     .group_input_channels(17)
11882     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11883     .qmin(128)
11884     .iterations(3)
11885     .TestF16();
11886 }
11887 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_with_qmax)11888 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_with_qmax) {
11889   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11890   DeconvolutionOperatorTester()
11891     .input_size(kStridedInputHeight, kStridedInputWidth)
11892     .kernel_size(2, 2)
11893     .stride(2)
11894     .groups(2)
11895     .group_input_channels(17)
11896     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11897     .qmax(128)
11898     .iterations(3)
11899     .TestF16();
11900 }
11901 
TEST(DECONVOLUTION_NHWC_F16,grouped_2x2s2_without_bias)11902 TEST(DECONVOLUTION_NHWC_F16, grouped_2x2s2_without_bias) {
11903   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11904   DeconvolutionOperatorTester()
11905     .has_bias(false)
11906     .input_size(kStridedInputHeight, kStridedInputWidth)
11907     .kernel_size(2, 2)
11908     .stride(2)
11909     .groups(2)
11910     .group_input_channels(17)
11911     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11912     .iterations(3)
11913     .TestF16();
11914 }
11915 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_grouped_2x2s2)11916 TEST(DECONVOLUTION_NHWC_F16, weights_cache_grouped_2x2s2) {
11917   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11918   DeconvolutionOperatorTester()
11919     .input_size(kStridedInputHeight, kStridedInputWidth)
11920     .kernel_size(2, 2)
11921     .stride(2)
11922     .groups(2)
11923     .group_input_channels(17)
11924     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11925     .use_weights_cache(true)
11926     .iterations(3)
11927     .TestF16();
11928 }
11929 
11930 /**************************** SUBCONV2D/GEMM path, batched ****************************/
11931 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2)11932 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2) {
11933   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11934   DeconvolutionOperatorTester()
11935     .batch_size(2)
11936     .input_size(kStridedInputHeight, kStridedInputWidth)
11937     .kernel_size(2, 2)
11938     .stride(2)
11939     .group_input_channels(15)
11940     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11941     .iterations(3)
11942     .TestF16();
11943 }
11944 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_fp32_weights)11945 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_fp32_weights) {
11946   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11947   DeconvolutionOperatorTester()
11948     .batch_size(2)
11949     .input_size(kStridedInputHeight, kStridedInputWidth)
11950     .kernel_size(2, 2)
11951     .stride(2)
11952     .group_input_channels(15)
11953     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11954     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
11955     .iterations(3)
11956     .TestF16();
11957 }
11958 
TEST(DECONVOLUTION_NHWC_F16,batched_Kx2sKx2)11959 TEST(DECONVOLUTION_NHWC_F16, batched_Kx2sKx2) {
11960   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11961   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
11962     DeconvolutionOperatorTester()
11963       .batch_size(2)
11964       .input_size(kStridedInputHeight, kStridedInputWidth)
11965       .kernel_size(kernel_height, 2)
11966       .stride(kernel_height, 2)
11967       .group_input_channels(17)
11968       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11969       .iterations(3)
11970       .TestF16();
11971   }
11972 }
11973 
TEST(DECONVOLUTION_NHWC_F16,batched_2xKs2xK)11974 TEST(DECONVOLUTION_NHWC_F16, batched_2xKs2xK) {
11975   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11976   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
11977     DeconvolutionOperatorTester()
11978       .batch_size(2)
11979       .input_size(kStridedInputHeight, kStridedInputWidth)
11980       .kernel_size(2, kernel_width)
11981       .stride(2, kernel_width)
11982       .group_input_channels(17)
11983       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11984       .iterations(3)
11985       .TestF16();
11986   }
11987 }
11988 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_height_adjustment)11989 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_height_adjustment) {
11990   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
11991   DeconvolutionOperatorTester()
11992     .batch_size(2)
11993     .input_size(kStridedInputHeight, kStridedInputWidth)
11994     .adjustment_height(1)
11995     .kernel_size(2, 2)
11996     .stride(2)
11997     .group_input_channels(15)
11998     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
11999     .iterations(1)
12000     .TestF16();
12001 }
12002 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_width_adjustment)12003 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_width_adjustment) {
12004   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12005   DeconvolutionOperatorTester()
12006     .batch_size(2)
12007     .input_size(kStridedInputHeight, kStridedInputWidth)
12008     .adjustment_width(1)
12009     .kernel_size(2, 2)
12010     .stride(2)
12011     .group_input_channels(15)
12012     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12013     .iterations(1)
12014     .TestF16();
12015 }
12016 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_input_height)12017 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_input_height) {
12018   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12019   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
12020     DeconvolutionOperatorTester()
12021       .batch_size(2)
12022       .input_size(input_height, kStridedInputWidth)
12023       .kernel_size(2, 2)
12024       .stride(2)
12025       .group_input_channels(15)
12026       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12027       .iterations(1)
12028       .TestF16();
12029   }
12030 }
12031 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_input_width)12032 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_input_width) {
12033   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12034   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
12035     DeconvolutionOperatorTester()
12036       .batch_size(2)
12037       .input_size(kStridedInputHeight, kStridedInputWidth)
12038       .kernel_size(2, 2)
12039       .stride(2)
12040       .group_input_channels(15)
12041       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12042       .iterations(1)
12043       .TestF16();
12044   }
12045 }
12046 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_input_channels)12047 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_input_channels) {
12048   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12049   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12050     DeconvolutionOperatorTester()
12051       .batch_size(2)
12052       .input_size(kStridedInputHeight, kStridedInputWidth)
12053       .kernel_size(2, 2)
12054       .stride(2)
12055       .group_input_channels(input_channels)
12056       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12057       .iterations(1)
12058       .TestF16();
12059   }
12060 }
12061 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_varying_output_channels)12062 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_varying_output_channels) {
12063   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12064   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
12065     DeconvolutionOperatorTester()
12066       .batch_size(2)
12067       .input_size(kStridedInputHeight, kStridedInputWidth)
12068       .kernel_size(2, 2)
12069       .stride(2)
12070       .group_input_channels(23)
12071       .group_output_channels(output_channels)
12072       .iterations(1)
12073       .TestF16();
12074   }
12075 }
12076 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_input_stride)12077 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_input_stride) {
12078   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12079   DeconvolutionOperatorTester()
12080     .batch_size(2)
12081     .input_size(kStridedInputHeight, kStridedInputWidth)
12082     .kernel_size(2, 2)
12083     .stride(2)
12084     .group_input_channels(23)
12085     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12086     .input_pixel_stride(28)
12087     .iterations(3)
12088     .TestF16();
12089 }
12090 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_output_stride)12091 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_output_stride) {
12092   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12093   DeconvolutionOperatorTester()
12094     .batch_size(2)
12095     .input_size(kStridedInputHeight, kStridedInputWidth)
12096     .kernel_size(2, 2)
12097     .stride(2)
12098     .group_input_channels(23)
12099     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12100     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
12101     .iterations(3)
12102     .TestF16();
12103 }
12104 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_qmin)12105 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_qmin) {
12106   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12107   DeconvolutionOperatorTester()
12108     .batch_size(2)
12109     .input_size(kStridedInputHeight, kStridedInputWidth)
12110     .kernel_size(2, 2)
12111     .stride(2)
12112     .group_input_channels(23)
12113     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12114     .qmin(128)
12115     .iterations(3)
12116     .TestF16();
12117 }
12118 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_with_qmax)12119 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_with_qmax) {
12120   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12121   DeconvolutionOperatorTester()
12122     .batch_size(2)
12123     .input_size(kStridedInputHeight, kStridedInputWidth)
12124     .kernel_size(2, 2)
12125     .stride(2)
12126     .group_input_channels(23)
12127     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12128     .qmax(128)
12129     .iterations(3)
12130     .TestF16();
12131 }
12132 
TEST(DECONVOLUTION_NHWC_F16,batched_2x2s2_without_bias)12133 TEST(DECONVOLUTION_NHWC_F16, batched_2x2s2_without_bias) {
12134   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12135   DeconvolutionOperatorTester()
12136     .has_bias(false)
12137     .batch_size(2)
12138     .input_size(kStridedInputHeight, kStridedInputWidth)
12139     .kernel_size(2, 2)
12140     .stride(2)
12141     .group_input_channels(23)
12142     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12143     .iterations(3)
12144     .TestF16();
12145 }
12146 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_2x2s2)12147 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_2x2s2) {
12148   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12149   DeconvolutionOperatorTester()
12150     .batch_size(2)
12151     .input_size(kStridedInputHeight, kStridedInputWidth)
12152     .kernel_size(2, 2)
12153     .stride(2)
12154     .group_input_channels(15)
12155     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12156     .use_weights_cache(true)
12157     .iterations(3)
12158     .TestF16();
12159 }
12160 
12161 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
12162 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2)12163 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2) {
12164   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12165   DeconvolutionOperatorTester()
12166     .batch_size(2)
12167     .input_size(kStridedInputHeight, kStridedInputWidth)
12168     .kernel_size(2, 2)
12169     .stride(2)
12170     .groups(2)
12171     .group_input_channels(17)
12172     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12173     .iterations(3)
12174     .TestF16();
12175 }
12176 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_fp32_weights)12177 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_fp32_weights) {
12178   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12179   DeconvolutionOperatorTester()
12180     .batch_size(2)
12181     .input_size(kStridedInputHeight, kStridedInputWidth)
12182     .kernel_size(2, 2)
12183     .stride(2)
12184     .groups(2)
12185     .group_input_channels(17)
12186     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12187     .weights_type(DeconvolutionOperatorTester::WeightsType::FP32)
12188     .iterations(3)
12189     .TestF16();
12190 }
12191 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_Kx2sKx2)12192 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_Kx2sKx2) {
12193   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12194   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
12195     DeconvolutionOperatorTester()
12196       .batch_size(2)
12197       .input_size(kStridedInputHeight, kStridedInputWidth)
12198       .kernel_size(kernel_height, 2)
12199       .stride(kernel_height, 2)
12200       .groups(2)
12201       .group_input_channels(17)
12202       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12203       .iterations(3)
12204       .TestF16();
12205   }
12206 }
12207 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2xKs2xK)12208 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2xKs2xK) {
12209   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12210   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
12211     DeconvolutionOperatorTester()
12212       .batch_size(2)
12213       .input_size(kStridedInputHeight, kStridedInputWidth)
12214       .kernel_size(2, kernel_width)
12215       .stride(2, kernel_width)
12216       .groups(2)
12217       .group_input_channels(17)
12218       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12219       .iterations(3)
12220       .TestF16();
12221   }
12222 }
12223 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_height_adjustment)12224 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_height_adjustment) {
12225   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12226   DeconvolutionOperatorTester()
12227     .batch_size(2)
12228     .input_size(kStridedInputHeight, kStridedInputWidth)
12229     .adjustment_height(1)
12230     .kernel_size(2, 2)
12231     .stride(2)
12232     .groups(2)
12233     .group_input_channels(17)
12234     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12235     .iterations(1)
12236     .TestF16();
12237 }
12238 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_width_adjustment)12239 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_width_adjustment) {
12240   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12241   DeconvolutionOperatorTester()
12242     .batch_size(2)
12243     .input_size(kStridedInputHeight, kStridedInputWidth)
12244     .adjustment_width(1)
12245     .kernel_size(2, 2)
12246     .stride(2)
12247     .groups(2)
12248     .group_input_channels(17)
12249     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12250     .iterations(1)
12251     .TestF16();
12252 }
12253 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_input_height)12254 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_input_height) {
12255   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12256   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
12257     DeconvolutionOperatorTester()
12258       .batch_size(2)
12259       .input_size(input_height, kStridedInputWidth)
12260       .kernel_size(2, 2)
12261       .stride(2)
12262       .groups(2)
12263       .group_input_channels(17)
12264       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12265       .iterations(1)
12266       .TestF16();
12267   }
12268 }
12269 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_input_width)12270 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_input_width) {
12271   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12272   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
12273     DeconvolutionOperatorTester()
12274       .batch_size(2)
12275       .input_size(kStridedInputHeight, kStridedInputWidth)
12276       .kernel_size(2, 2)
12277       .stride(2)
12278       .groups(2)
12279       .group_input_channels(17)
12280       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12281       .iterations(1)
12282       .TestF16();
12283   }
12284 }
12285 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_input_channels)12286 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_input_channels) {
12287   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12288   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
12289     DeconvolutionOperatorTester()
12290       .batch_size(2)
12291       .input_size(kStridedInputHeight, kStridedInputWidth)
12292       .kernel_size(2, 2)
12293       .stride(2)
12294       .groups(2)
12295       .group_input_channels(input_channels)
12296       .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12297       .iterations(1)
12298       .TestF16();
12299   }
12300 }
12301 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_varying_output_channels)12302 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_varying_output_channels) {
12303   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12304   for (size_t output_channels = 1; output_channels <= xnn_params.f16.gemm.nr * 2; output_channels *= 2) {
12305     DeconvolutionOperatorTester()
12306       .batch_size(2)
12307       .input_size(kStridedInputHeight, kStridedInputWidth)
12308       .kernel_size(2, 2)
12309       .stride(2)
12310       .groups(2)
12311       .group_input_channels(17)
12312       .group_output_channels(output_channels)
12313       .iterations(1)
12314       .TestF16();
12315   }
12316 }
12317 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_input_stride)12318 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_input_stride) {
12319   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12320   DeconvolutionOperatorTester()
12321     .batch_size(2)
12322     .input_size(kStridedInputHeight, kStridedInputWidth)
12323     .kernel_size(2, 2)
12324     .stride(2)
12325     .groups(2)
12326     .group_input_channels(17)
12327     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12328     .input_pixel_stride(37)
12329     .iterations(3)
12330     .TestF16();
12331 }
12332 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_output_stride)12333 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_output_stride) {
12334   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12335   DeconvolutionOperatorTester()
12336     .batch_size(2)
12337     .input_size(kStridedInputHeight, kStridedInputWidth)
12338     .kernel_size(2, 2)
12339     .stride(2)
12340     .groups(2)
12341     .group_input_channels(17)
12342     .group_output_channels(xnn_params.f16.gemm.nr + 3)
12343     .output_pixel_stride(xnn_params.f16.gemm.nr * 2 + 13)
12344     .iterations(3)
12345     .TestF16();
12346 }
12347 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_qmin)12348 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_qmin) {
12349   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12350   DeconvolutionOperatorTester()
12351     .batch_size(2)
12352     .input_size(kStridedInputHeight, kStridedInputWidth)
12353     .kernel_size(2, 2)
12354     .stride(2)
12355     .groups(2)
12356     .group_input_channels(17)
12357     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12358     .qmin(128)
12359     .iterations(3)
12360     .TestF16();
12361 }
12362 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_with_qmax)12363 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_with_qmax) {
12364   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12365   DeconvolutionOperatorTester()
12366     .batch_size(2)
12367     .input_size(kStridedInputHeight, kStridedInputWidth)
12368     .kernel_size(2, 2)
12369     .stride(2)
12370     .groups(2)
12371     .group_input_channels(17)
12372     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12373     .qmax(128)
12374     .iterations(3)
12375     .TestF16();
12376 }
12377 
TEST(DECONVOLUTION_NHWC_F16,batched_grouped_2x2s2_without_bias)12378 TEST(DECONVOLUTION_NHWC_F16, batched_grouped_2x2s2_without_bias) {
12379   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12380   DeconvolutionOperatorTester()
12381     .has_bias(false)
12382     .batch_size(2)
12383     .input_size(kStridedInputHeight, kStridedInputWidth)
12384     .kernel_size(2, 2)
12385     .stride(2)
12386     .groups(2)
12387     .group_input_channels(17)
12388     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12389     .iterations(3)
12390     .TestF16();
12391 }
12392 
TEST(DECONVOLUTION_NHWC_F16,weights_cache_batched_grouped_2x2s2)12393 TEST(DECONVOLUTION_NHWC_F16, weights_cache_batched_grouped_2x2s2) {
12394   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12395   DeconvolutionOperatorTester()
12396     .batch_size(2)
12397     .input_size(kStridedInputHeight, kStridedInputWidth)
12398     .kernel_size(2, 2)
12399     .stride(2)
12400     .groups(2)
12401     .group_input_channels(17)
12402     .group_output_channels(xnn_params.f16.gemm.nr * 2 + 3)
12403     .use_weights_cache(true)
12404     .iterations(3)
12405     .TestF16();
12406 }
12407 
12408 // /**************************** SUBCONV2D/GEMM path, setup ****************************/
12409 
12410 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_setup_changing_batch) {
12411   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12412   DeconvolutionOperatorTester()
12413     .batch_size(2)
12414     .next_batch_size(5)
12415     .input_size(kStridedInputHeight, kStridedInputWidth)
12416     .kernel_size(2, 2)
12417     .stride(2)
12418     .groups(2)
12419     .group_input_channels(15)
12420     .group_output_channels(17)
12421     .TestSetupF16();
12422 }
12423 
12424 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_setup_changing_height) {
12425   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12426   DeconvolutionOperatorTester()
12427     .batch_size(2)
12428     .input_size(kStridedInputHeight, kStridedInputWidth)
12429     .next_input_height(kStridedInputHeight + 3)
12430     .kernel_size(2, 2)
12431     .stride(2)
12432     .groups(2)
12433     .group_input_channels(15)
12434     .group_output_channels(17)
12435     .TestSetupF16();
12436 }
12437 
12438 TEST(DECONVOLUTION_NHWC_F16, 2x2s2_setup_changing_width) {
12439   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12440   DeconvolutionOperatorTester()
12441     .batch_size(2)
12442     .input_size(kStridedInputHeight, kStridedInputWidth)
12443     .next_input_width(kStridedInputWidth + 3)
12444     .kernel_size(2, 2)
12445     .stride(2)
12446     .groups(2)
12447     .group_input_channels(15)
12448     .group_output_channels(17)
12449     .TestSetupF16();
12450 }
12451 
12452 /**************************** Future GEMM path ****************************/
12453 
12454 TEST(DECONVOLUTION_NHWC_F32, 1x1) {
12455   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12456   DeconvolutionOperatorTester()
12457     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12458     .kernel_size(1, 1)
12459     .group_input_channels(23)
12460     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12461     .iterations(3)
12462     .TestF32();
12463 }
12464 
12465 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_input_width) {
12466   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12467   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12468     DeconvolutionOperatorTester()
12469       .input_size(input_height, kUnstridedInputWidth)
12470       .kernel_size(1, 1)
12471       .group_input_channels(23)
12472       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12473       .iterations(1)
12474       .TestF32();
12475   }
12476 }
12477 
12478 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_input_height) {
12479   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12480   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12481     DeconvolutionOperatorTester()
12482       .input_size(kUnstridedInputHeight, input_width)
12483       .kernel_size(1, 1)
12484       .group_input_channels(23)
12485       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12486       .iterations(1)
12487       .TestF32();
12488   }
12489 }
12490 
12491 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_input_channels) {
12492   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12493   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12494     DeconvolutionOperatorTester()
12495       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12496       .kernel_size(1, 1)
12497       .group_input_channels(input_channels)
12498       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12499       .iterations(1)
12500       .TestF32();
12501   }
12502 }
12503 
12504 TEST(DECONVOLUTION_NHWC_F32, 1x1_varying_output_channels) {
12505   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12506   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12507     DeconvolutionOperatorTester()
12508       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12509       .kernel_size(1, 1)
12510       .group_input_channels(23)
12511       .group_output_channels(output_channels)
12512       .iterations(1)
12513       .TestF32();
12514   }
12515 }
12516 
12517 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_input_stride) {
12518   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12519   DeconvolutionOperatorTester()
12520     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12521     .kernel_size(1, 1)
12522     .group_input_channels(23)
12523     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12524     .input_pixel_stride(28)
12525     .iterations(3)
12526     .TestF32();
12527 }
12528 
12529 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_output_stride) {
12530   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12531   DeconvolutionOperatorTester()
12532     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12533     .kernel_size(1, 1)
12534     .group_input_channels(23)
12535     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12536     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12537     .iterations(3)
12538     .TestF32();
12539 }
12540 
12541 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_qmin) {
12542   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12543   DeconvolutionOperatorTester()
12544     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12545     .kernel_size(1, 1)
12546     .group_input_channels(23)
12547     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12548     .qmin(128)
12549     .iterations(3)
12550     .TestF32();
12551 }
12552 
12553 TEST(DECONVOLUTION_NHWC_F32, 1x1_with_qmax) {
12554   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12555   DeconvolutionOperatorTester()
12556     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12557     .kernel_size(1, 1)
12558     .group_input_channels(23)
12559     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12560     .qmax(128)
12561     .iterations(3)
12562     .TestF32();
12563 }
12564 
12565 TEST(DECONVOLUTION_NHWC_F32, 1x1_without_bias) {
12566   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12567   DeconvolutionOperatorTester()
12568     .has_bias(false)
12569     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12570     .kernel_size(1, 1)
12571     .group_input_channels(23)
12572     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12573     .iterations(3)
12574     .TestF32();
12575 }
12576 
12577 /**************************** Future GEMM path, grouped ****************************/
12578 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1)12579 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1) {
12580   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12581   DeconvolutionOperatorTester()
12582     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12583     .kernel_size(1, 1)
12584     .groups(2)
12585     .group_input_channels(23)
12586     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12587     .iterations(3)
12588     .TestF32();
12589 }
12590 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_input_width)12591 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_input_width) {
12592   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12593   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12594     DeconvolutionOperatorTester()
12595       .input_size(input_height, kUnstridedInputWidth)
12596       .kernel_size(1, 1)
12597       .groups(2)
12598       .group_input_channels(23)
12599       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12600       .iterations(1)
12601       .TestF32();
12602   }
12603 }
12604 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_input_height)12605 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_input_height) {
12606   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12607   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12608     DeconvolutionOperatorTester()
12609       .input_size(kUnstridedInputHeight, input_width)
12610       .kernel_size(1, 1)
12611       .groups(2)
12612       .group_input_channels(23)
12613       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12614       .iterations(1)
12615       .TestF32();
12616   }
12617 }
12618 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_input_channels)12619 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_input_channels) {
12620   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12621   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12622     DeconvolutionOperatorTester()
12623       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12624       .kernel_size(1, 1)
12625       .groups(2)
12626       .group_input_channels(input_channels)
12627       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12628       .iterations(1)
12629       .TestF32();
12630   }
12631 }
12632 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_varying_output_channels)12633 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_varying_output_channels) {
12634   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12635   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12636     DeconvolutionOperatorTester()
12637       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12638       .kernel_size(1, 1)
12639       .groups(2)
12640       .group_input_channels(23)
12641       .group_output_channels(output_channels)
12642       .iterations(1)
12643       .TestF32();
12644   }
12645 }
12646 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_input_stride)12647 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_input_stride) {
12648   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12649   DeconvolutionOperatorTester()
12650     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12651     .kernel_size(1, 1)
12652     .groups(2)
12653     .group_input_channels(23)
12654     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12655     .input_pixel_stride(47)
12656     .iterations(3)
12657     .TestF32();
12658 }
12659 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_output_stride)12660 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_output_stride) {
12661   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12662   DeconvolutionOperatorTester()
12663     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12664     .kernel_size(1, 1)
12665     .groups(2)
12666     .group_input_channels(23)
12667     .group_output_channels(xnn_params.f32.gemm.nr + 3)
12668     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12669     .iterations(3)
12670     .TestF32();
12671 }
12672 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_qmin)12673 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_qmin) {
12674   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12675   DeconvolutionOperatorTester()
12676     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12677     .kernel_size(1, 1)
12678     .groups(2)
12679     .group_input_channels(23)
12680     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12681     .qmin(128)
12682     .iterations(3)
12683     .TestF32();
12684 }
12685 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_with_qmax)12686 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_with_qmax) {
12687   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12688   DeconvolutionOperatorTester()
12689     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12690     .kernel_size(1, 1)
12691     .groups(2)
12692     .group_input_channels(23)
12693     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12694     .qmax(128)
12695     .iterations(3)
12696     .TestF32();
12697 }
12698 
TEST(DECONVOLUTION_NHWC_F32,grouped_1x1_without_bias)12699 TEST(DECONVOLUTION_NHWC_F32, grouped_1x1_without_bias) {
12700   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12701   DeconvolutionOperatorTester()
12702     .has_bias(false)
12703     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12704     .kernel_size(1, 1)
12705     .groups(2)
12706     .group_input_channels(23)
12707     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12708     .iterations(3)
12709     .TestF32();
12710 }
12711 
12712 /**************************** Future GEMM path, batched ****************************/
12713 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1)12714 TEST(DECONVOLUTION_NHWC_F32, batched_1x1) {
12715   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12716   DeconvolutionOperatorTester()
12717     .batch_size(2)
12718     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12719     .kernel_size(1, 1)
12720     .group_input_channels(23)
12721     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12722     .iterations(3)
12723     .TestF32();
12724 }
12725 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_input_width)12726 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_input_width) {
12727   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12728   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12729     DeconvolutionOperatorTester()
12730       .batch_size(2)
12731       .input_size(input_height, kUnstridedInputWidth)
12732       .kernel_size(1, 1)
12733       .group_input_channels(23)
12734       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12735       .iterations(1)
12736       .TestF32();
12737   }
12738 }
12739 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_input_height)12740 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_input_height) {
12741   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12742   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12743     DeconvolutionOperatorTester()
12744       .batch_size(2)
12745       .input_size(kUnstridedInputHeight, input_width)
12746       .kernel_size(1, 1)
12747       .group_input_channels(23)
12748       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12749       .iterations(1)
12750       .TestF32();
12751   }
12752 }
12753 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_input_channels)12754 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_input_channels) {
12755   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12756   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12757     DeconvolutionOperatorTester()
12758       .batch_size(2)
12759       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12760       .kernel_size(1, 1)
12761       .group_input_channels(input_channels)
12762       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12763       .iterations(1)
12764       .TestF32();
12765   }
12766 }
12767 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_varying_output_channels)12768 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_varying_output_channels) {
12769   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12770   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12771     DeconvolutionOperatorTester()
12772       .batch_size(2)
12773       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12774       .kernel_size(1, 1)
12775       .group_input_channels(23)
12776       .group_output_channels(output_channels)
12777       .iterations(1)
12778       .TestF32();
12779   }
12780 }
12781 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_input_stride)12782 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_input_stride) {
12783   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12784   DeconvolutionOperatorTester()
12785     .batch_size(2)
12786     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12787     .kernel_size(1, 1)
12788     .group_input_channels(23)
12789     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12790     .input_pixel_stride(28)
12791     .iterations(3)
12792     .TestF32();
12793 }
12794 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_output_stride)12795 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_output_stride) {
12796   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12797   DeconvolutionOperatorTester()
12798     .batch_size(2)
12799     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12800     .kernel_size(1, 1)
12801     .group_input_channels(23)
12802     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12803     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12804     .iterations(3)
12805     .TestF32();
12806 }
12807 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_qmin)12808 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_qmin) {
12809   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12810   DeconvolutionOperatorTester()
12811     .batch_size(2)
12812     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12813     .kernel_size(1, 1)
12814     .group_input_channels(23)
12815     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12816     .qmin(128)
12817     .iterations(3)
12818     .TestF32();
12819 }
12820 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_with_qmax)12821 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_with_qmax) {
12822   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12823   DeconvolutionOperatorTester()
12824     .batch_size(2)
12825     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12826     .kernel_size(1, 1)
12827     .group_input_channels(23)
12828     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12829     .qmax(128)
12830     .iterations(3)
12831     .TestF32();
12832 }
12833 
TEST(DECONVOLUTION_NHWC_F32,batched_1x1_without_bias)12834 TEST(DECONVOLUTION_NHWC_F32, batched_1x1_without_bias) {
12835   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12836   DeconvolutionOperatorTester()
12837     .has_bias(false)
12838     .batch_size(2)
12839     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12840     .kernel_size(1, 1)
12841     .group_input_channels(23)
12842     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12843     .iterations(3)
12844     .TestF32();
12845 }
12846 
12847 /**************************** Future GEMM path, batched, grouped ****************************/
12848 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1)12849 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1) {
12850   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12851   DeconvolutionOperatorTester()
12852     .batch_size(2)
12853     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12854     .kernel_size(1, 1)
12855     .groups(2)
12856     .group_input_channels(23)
12857     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12858     .iterations(3)
12859     .TestF32();
12860 }
12861 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_input_width)12862 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_input_width) {
12863   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12864   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
12865     DeconvolutionOperatorTester()
12866       .batch_size(2)
12867       .input_size(input_height, kUnstridedInputWidth)
12868       .kernel_size(1, 1)
12869       .groups(2)
12870       .group_input_channels(23)
12871       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12872       .iterations(1)
12873       .TestF32();
12874   }
12875 }
12876 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_input_height)12877 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_input_height) {
12878   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12879   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
12880     DeconvolutionOperatorTester()
12881       .batch_size(2)
12882       .input_size(kUnstridedInputHeight, input_width)
12883       .kernel_size(1, 1)
12884       .groups(2)
12885       .group_input_channels(23)
12886       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12887       .iterations(1)
12888       .TestF32();
12889   }
12890 }
12891 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_input_channels)12892 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_input_channels) {
12893   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12894   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
12895     DeconvolutionOperatorTester()
12896       .batch_size(2)
12897       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12898       .kernel_size(1, 1)
12899       .groups(2)
12900       .group_input_channels(input_channels)
12901       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12902       .iterations(1)
12903       .TestF32();
12904   }
12905 }
12906 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_varying_output_channels)12907 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_varying_output_channels) {
12908   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12909   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
12910     DeconvolutionOperatorTester()
12911       .batch_size(2)
12912       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12913       .kernel_size(1, 1)
12914       .groups(2)
12915       .group_input_channels(23)
12916       .group_output_channels(output_channels)
12917       .iterations(1)
12918       .TestF32();
12919   }
12920 }
12921 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_input_stride)12922 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_input_stride) {
12923   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12924   DeconvolutionOperatorTester()
12925     .batch_size(2)
12926     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12927     .kernel_size(1, 1)
12928     .groups(2)
12929     .group_input_channels(23)
12930     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12931     .input_pixel_stride(47)
12932     .iterations(3)
12933     .TestF32();
12934 }
12935 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_output_stride)12936 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_output_stride) {
12937   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12938   DeconvolutionOperatorTester()
12939     .batch_size(2)
12940     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12941     .kernel_size(1, 1)
12942     .groups(2)
12943     .group_input_channels(23)
12944     .group_output_channels(xnn_params.f32.gemm.nr + 3)
12945     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
12946     .iterations(3)
12947     .TestF32();
12948 }
12949 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_qmin)12950 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_qmin) {
12951   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12952   DeconvolutionOperatorTester()
12953     .batch_size(2)
12954     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12955     .kernel_size(1, 1)
12956     .groups(2)
12957     .group_input_channels(23)
12958     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12959     .qmin(128)
12960     .iterations(3)
12961     .TestF32();
12962 }
12963 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_with_qmax)12964 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_with_qmax) {
12965   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12966   DeconvolutionOperatorTester()
12967     .batch_size(2)
12968     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12969     .kernel_size(1, 1)
12970     .groups(2)
12971     .group_input_channels(23)
12972     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12973     .qmax(128)
12974     .iterations(3)
12975     .TestF32();
12976 }
12977 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_1x1_without_bias)12978 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_1x1_without_bias) {
12979   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12980   DeconvolutionOperatorTester()
12981     .has_bias(false)
12982     .batch_size(2)
12983     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12984     .kernel_size(1, 1)
12985     .groups(2)
12986     .group_input_channels(23)
12987     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
12988     .iterations(3)
12989     .TestF32();
12990 }
12991 
12992 /**************************** CONV path ****************************/
12993 
12994 TEST(DECONVOLUTION_NHWC_F32, 3x3) {
12995   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
12996   DeconvolutionOperatorTester()
12997     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
12998     .padding(1)
12999     .kernel_size(3, 3)
13000     .group_input_channels(15)
13001     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13002     .iterations(3)
13003     .TestF32();
13004 }
13005 
TEST(DECONVOLUTION_NHWC_F32,Kx3)13006 TEST(DECONVOLUTION_NHWC_F32, Kx3) {
13007   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13008   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13009     DeconvolutionOperatorTester()
13010       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13011       .padding_width(1)
13012       .kernel_size(kernel_height, 3)
13013       .group_input_channels(17)
13014       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13015       .iterations(3)
13016       .TestF32();
13017   }
13018 }
13019 
13020 TEST(DECONVOLUTION_NHWC_F32, 3xK) {
13021   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13022   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13023     DeconvolutionOperatorTester()
13024       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13025       .padding_height(1)
13026       .kernel_size(3, kernel_width)
13027       .group_input_channels(17)
13028       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13029       .iterations(3)
13030       .TestF32();
13031   }
13032 }
13033 
13034 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_height_padding) {
13035   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13036   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13037     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13038       DeconvolutionOperatorTester()
13039         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13040         .padding_width(1)
13041         .padding_top(padding_top)
13042         .padding_bottom(padding_bottom)
13043         .kernel_size(3, 3)
13044         .group_input_channels(15)
13045         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13046         .iterations(1)
13047         .TestF32();
13048     }
13049   }
13050 }
13051 
13052 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_width_padding) {
13053   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13054   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
13055     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
13056       DeconvolutionOperatorTester()
13057         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13058         .padding_height(1)
13059         .padding_left(padding_left)
13060         .padding_right(padding_right)
13061         .kernel_size(3, 3)
13062         .group_input_channels(15)
13063         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13064         .iterations(1)
13065         .TestF32();
13066     }
13067   }
13068 }
13069 
13070 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_height_adjustment) {
13071   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13072   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
13073     DeconvolutionOperatorTester()
13074       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13075       .padding(1)
13076       .stride_height(adjustment_height + 1)
13077       .adjustment_height(adjustment_height)
13078       .kernel_size(3, 3)
13079       .group_input_channels(15)
13080       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13081       .iterations(1)
13082       .TestF32();
13083   }
13084 }
13085 
13086 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_width_adjustment) {
13087   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13088   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
13089     DeconvolutionOperatorTester()
13090       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13091       .padding(1)
13092       .stride_width(adjustment_width + 1)
13093       .adjustment_width(adjustment_width)
13094       .kernel_size(3, 3)
13095       .group_input_channels(15)
13096       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13097       .iterations(1)
13098       .TestF32();
13099   }
13100 }
13101 
13102 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_input_height) {
13103   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13104   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
13105     DeconvolutionOperatorTester()
13106       .input_size(input_height, kUnstridedInputWidth)
13107       .padding(1)
13108       .kernel_size(3, 3)
13109       .group_input_channels(15)
13110       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13111       .iterations(1)
13112       .TestF32();
13113   }
13114 }
13115 
13116 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_input_width) {
13117   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13118   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
13119     DeconvolutionOperatorTester()
13120       .input_size(kUnstridedInputHeight, input_width)
13121       .padding(1)
13122       .kernel_size(3, 3)
13123       .group_input_channels(15)
13124       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13125       .iterations(1)
13126       .TestF32();
13127   }
13128 }
13129 
13130 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_input_channels) {
13131   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13132   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
13133     DeconvolutionOperatorTester()
13134       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13135       .padding(1)
13136       .kernel_size(3, 3)
13137       .group_input_channels(input_channels)
13138       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13139       .iterations(1)
13140       .TestF32();
13141   }
13142 }
13143 
13144 TEST(DECONVOLUTION_NHWC_F32, 3x3_varying_output_channels) {
13145   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13146   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
13147     DeconvolutionOperatorTester()
13148       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13149       .padding(1)
13150       .kernel_size(3, 3)
13151       .group_input_channels(23)
13152       .group_output_channels(output_channels)
13153       .iterations(1)
13154       .TestF32();
13155   }
13156 }
13157 
13158 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_height_dilation) {
13159   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13160   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
13161     DeconvolutionOperatorTester()
13162       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13163       .padding(1)
13164       .kernel_size(3, 3)
13165       .dilation_height(dilation_height)
13166       .group_input_channels(23)
13167       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13168       .iterations(3)
13169       .TestF32();
13170   }
13171 }
13172 
13173 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_width_dilation) {
13174   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13175   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
13176     DeconvolutionOperatorTester()
13177       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13178       .padding(1)
13179       .kernel_size(3, 3)
13180       .dilation_width(dilation_width)
13181       .group_input_channels(23)
13182       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13183       .iterations(3)
13184       .TestF32();
13185   }
13186 }
13187 
13188 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_height_dilation_and_stride) {
13189   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13190   DeconvolutionOperatorTester()
13191     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13192     .padding(1)
13193     .kernel_size(3, 3)
13194     .dilation_height(3)
13195     .stride_height(2)
13196     .group_input_channels(23)
13197     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13198     .iterations(3)
13199     .TestF32();
13200 }
13201 
13202 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_width_dilation_and_stride) {
13203   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13204   DeconvolutionOperatorTester()
13205     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13206     .padding(1)
13207     .kernel_size(3, 3)
13208     .dilation_width(3)
13209     .stride_width(2)
13210     .group_input_channels(23)
13211     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13212     .iterations(3)
13213     .TestF32();
13214 }
13215 
13216 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_input_stride) {
13217   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13218   DeconvolutionOperatorTester()
13219     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13220     .padding(1)
13221     .kernel_size(3, 3)
13222     .group_input_channels(23)
13223     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13224     .input_pixel_stride(28)
13225     .iterations(3)
13226     .TestF32();
13227 }
13228 
13229 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_output_stride) {
13230   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13231   DeconvolutionOperatorTester()
13232     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13233     .padding(1)
13234     .kernel_size(3, 3)
13235     .group_input_channels(23)
13236     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13237     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
13238     .iterations(3)
13239     .TestF32();
13240 }
13241 
13242 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_qmin) {
13243   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13244   DeconvolutionOperatorTester()
13245     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13246     .padding(1)
13247     .kernel_size(3, 3)
13248     .group_input_channels(23)
13249     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13250     .qmin(128)
13251     .iterations(3)
13252     .TestF32();
13253 }
13254 
13255 TEST(DECONVOLUTION_NHWC_F32, 3x3_with_qmax) {
13256   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13257   DeconvolutionOperatorTester()
13258     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13259     .padding(1)
13260     .kernel_size(3, 3)
13261     .group_input_channels(23)
13262     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13263     .qmax(128)
13264     .iterations(3)
13265     .TestF32();
13266 }
13267 
13268 TEST(DECONVOLUTION_NHWC_F32, 3x3_without_bias) {
13269   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13270   DeconvolutionOperatorTester()
13271     .has_bias(false)
13272     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13273     .padding(1)
13274     .kernel_size(3, 3)
13275     .group_input_channels(23)
13276     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13277     .iterations(3)
13278     .TestF32();
13279 }
13280 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_3x3)13281 TEST(DECONVOLUTION_NHWC_F32, weights_cache_3x3) {
13282   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13283   DeconvolutionOperatorTester()
13284     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13285     .padding(1)
13286     .kernel_size(3, 3)
13287     .group_input_channels(15)
13288     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13289     .use_weights_cache(true)
13290     .iterations(3)
13291     .TestF32();
13292 }
13293 
13294 /**************************** CONV path, grouped ****************************/
13295 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3)13296 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3) {
13297   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13298   DeconvolutionOperatorTester()
13299     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13300     .padding(1)
13301     .kernel_size(3, 3)
13302     .groups(2)
13303     .group_input_channels(15)
13304     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13305     .iterations(3)
13306     .TestF32();
13307 }
13308 
TEST(DECONVOLUTION_NHWC_F32,grouped_Kx3)13309 TEST(DECONVOLUTION_NHWC_F32, grouped_Kx3) {
13310   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13311   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13312     DeconvolutionOperatorTester()
13313       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13314       .padding_width(1)
13315       .kernel_size(kernel_height, 3)
13316       .groups(2)
13317       .group_input_channels(17)
13318       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13319       .iterations(3)
13320       .TestF32();
13321   }
13322 }
13323 
TEST(DECONVOLUTION_NHWC_F32,grouped_3xK)13324 TEST(DECONVOLUTION_NHWC_F32, grouped_3xK) {
13325   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13326   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13327     DeconvolutionOperatorTester()
13328       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13329       .padding_height(1)
13330       .kernel_size(3, kernel_width)
13331       .groups(2)
13332       .group_input_channels(17)
13333       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13334       .iterations(3)
13335       .TestF32();
13336   }
13337 }
13338 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_height_padding)13339 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_height_padding) {
13340   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13341   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13342     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13343       DeconvolutionOperatorTester()
13344         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13345         .padding_width(1)
13346         .padding_top(padding_top)
13347         .padding_bottom(padding_bottom)
13348         .kernel_size(3, 3)
13349         .groups(2)
13350         .group_input_channels(15)
13351         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13352         .iterations(1)
13353         .TestF32();
13354     }
13355   }
13356 }
13357 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_width_padding)13358 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_width_padding) {
13359   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13360   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
13361     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
13362       DeconvolutionOperatorTester()
13363         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13364         .padding_height(1)
13365         .padding_left(padding_left)
13366         .padding_right(padding_right)
13367         .kernel_size(3, 3)
13368         .groups(2)
13369         .group_input_channels(15)
13370         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13371         .iterations(1)
13372         .TestF32();
13373     }
13374   }
13375 }
13376 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_height_adjustment)13377 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_height_adjustment) {
13378   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13379   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
13380     DeconvolutionOperatorTester()
13381       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13382       .padding(1)
13383       .stride_height(adjustment_height + 1)
13384       .adjustment_height(adjustment_height)
13385       .kernel_size(3, 3)
13386       .groups(2)
13387       .group_input_channels(15)
13388       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13389       .iterations(1)
13390       .TestF32();
13391   }
13392 }
13393 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_width_adjustment)13394 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_width_adjustment) {
13395   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13396   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
13397     DeconvolutionOperatorTester()
13398       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13399       .padding(1)
13400       .stride_width(adjustment_width + 1)
13401       .adjustment_width(adjustment_width)
13402       .kernel_size(3, 3)
13403       .groups(2)
13404       .group_input_channels(15)
13405       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13406       .iterations(1)
13407       .TestF32();
13408   }
13409 }
13410 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_input_height)13411 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_input_height) {
13412   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13413   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
13414     DeconvolutionOperatorTester()
13415       .input_size(input_height, kUnstridedInputWidth)
13416       .padding(1)
13417       .kernel_size(3, 3)
13418       .groups(2)
13419       .group_input_channels(15)
13420       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13421       .iterations(1)
13422       .TestF32();
13423   }
13424 }
13425 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_input_width)13426 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_input_width) {
13427   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13428   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
13429     DeconvolutionOperatorTester()
13430       .input_size(kUnstridedInputHeight, input_width)
13431       .padding(1)
13432       .kernel_size(3, 3)
13433       .groups(2)
13434       .group_input_channels(15)
13435       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13436       .iterations(1)
13437       .TestF32();
13438   }
13439 }
13440 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_input_channels)13441 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_input_channels) {
13442   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13443   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
13444     DeconvolutionOperatorTester()
13445       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13446       .padding(1)
13447       .kernel_size(3, 3)
13448       .groups(2)
13449       .group_input_channels(input_channels)
13450       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13451       .iterations(1)
13452       .TestF32();
13453   }
13454 }
13455 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_varying_output_channels)13456 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_varying_output_channels) {
13457   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13458   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
13459     DeconvolutionOperatorTester()
13460       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13461       .padding(1)
13462       .kernel_size(3, 3)
13463       .groups(2)
13464       .group_input_channels(23)
13465       .group_output_channels(output_channels)
13466       .iterations(1)
13467       .TestF32();
13468   }
13469 }
13470 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_height_dilation)13471 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_height_dilation) {
13472   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13473   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
13474     DeconvolutionOperatorTester()
13475       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13476       .padding(1)
13477       .kernel_size(3, 3)
13478       .dilation_height(dilation_height)
13479       .groups(2)
13480       .group_input_channels(23)
13481       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13482       .iterations(3)
13483       .TestF32();
13484   }
13485 }
13486 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_width_dilation)13487 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_width_dilation) {
13488   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13489   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
13490     DeconvolutionOperatorTester()
13491       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13492       .padding(1)
13493       .kernel_size(3, 3)
13494       .dilation_width(dilation_width)
13495       .groups(2)
13496       .group_input_channels(23)
13497       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13498       .iterations(3)
13499       .TestF32();
13500   }
13501 }
13502 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_height_dilation_and_stride)13503 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_height_dilation_and_stride) {
13504   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13505   DeconvolutionOperatorTester()
13506     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13507     .padding(1)
13508     .kernel_size(3, 3)
13509     .dilation_height(3)
13510     .stride_height(2)
13511     .groups(2)
13512     .group_input_channels(23)
13513     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13514     .iterations(3)
13515     .TestF32();
13516 }
13517 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_width_dilation_and_stride)13518 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_width_dilation_and_stride) {
13519   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13520   DeconvolutionOperatorTester()
13521     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13522     .padding(1)
13523     .kernel_size(3, 3)
13524     .dilation_width(3)
13525     .stride_width(2)
13526     .groups(2)
13527     .group_input_channels(23)
13528     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13529     .iterations(3)
13530     .TestF32();
13531 }
13532 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_input_stride)13533 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_input_stride) {
13534   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13535   DeconvolutionOperatorTester()
13536     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13537     .padding(1)
13538     .kernel_size(3, 3)
13539     .groups(2)
13540     .group_input_channels(23)
13541     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13542     .input_pixel_stride(47)
13543     .iterations(3)
13544     .TestF32();
13545 }
13546 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_output_stride)13547 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_output_stride) {
13548   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13549   DeconvolutionOperatorTester()
13550     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13551     .padding(1)
13552     .kernel_size(3, 3)
13553     .groups(2)
13554     .group_input_channels(23)
13555     .group_output_channels(xnn_params.f32.gemm.nr + 3)
13556     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
13557     .iterations(3)
13558     .TestF32();
13559 }
13560 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_qmin)13561 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_qmin) {
13562   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13563   DeconvolutionOperatorTester()
13564     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13565     .padding(1)
13566     .kernel_size(3, 3)
13567     .groups(2)
13568     .group_input_channels(23)
13569     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13570     .qmin(128)
13571     .iterations(3)
13572     .TestF32();
13573 }
13574 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_with_qmax)13575 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_with_qmax) {
13576   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13577   DeconvolutionOperatorTester()
13578     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13579     .padding(1)
13580     .kernel_size(3, 3)
13581     .groups(2)
13582     .group_input_channels(23)
13583     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13584     .qmax(128)
13585     .iterations(3)
13586     .TestF32();
13587 }
13588 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3_without_bias)13589 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3_without_bias) {
13590   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13591   DeconvolutionOperatorTester()
13592     .has_bias(false)
13593     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13594     .padding(1)
13595     .kernel_size(3, 3)
13596     .groups(2)
13597     .group_input_channels(23)
13598     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13599     .iterations(3)
13600     .TestF32();
13601 }
13602 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_grouped_3x3)13603 TEST(DECONVOLUTION_NHWC_F32, weights_cache_grouped_3x3) {
13604   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13605   DeconvolutionOperatorTester()
13606     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13607     .padding(1)
13608     .kernel_size(3, 3)
13609     .groups(2)
13610     .group_input_channels(15)
13611     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13612     .use_weights_cache(true)
13613     .iterations(3)
13614     .TestF32();
13615 }
13616 
13617 /**************************** CONV path, batched ****************************/
13618 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3)13619 TEST(DECONVOLUTION_NHWC_F32, batched_3x3) {
13620   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13621   DeconvolutionOperatorTester()
13622     .batch_size(2)
13623     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13624     .padding(1)
13625     .kernel_size(3, 3)
13626     .group_input_channels(15)
13627     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13628     .iterations(3)
13629     .TestF32();
13630 }
13631 
TEST(DECONVOLUTION_NHWC_F32,batched_Kx3)13632 TEST(DECONVOLUTION_NHWC_F32, batched_Kx3) {
13633   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13634   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13635     DeconvolutionOperatorTester()
13636       .batch_size(2)
13637       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13638       .padding_width(1)
13639       .kernel_size(kernel_height, 3)
13640       .group_input_channels(17)
13641       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13642       .iterations(3)
13643       .TestF32();
13644   }
13645 }
13646 
TEST(DECONVOLUTION_NHWC_F32,batched_3xK)13647 TEST(DECONVOLUTION_NHWC_F32, batched_3xK) {
13648   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13649   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13650     DeconvolutionOperatorTester()
13651       .batch_size(2)
13652       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13653       .padding_height(1)
13654       .kernel_size(3, kernel_width)
13655       .group_input_channels(17)
13656       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13657       .iterations(3)
13658       .TestF32();
13659   }
13660 }
13661 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_height_padding)13662 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_height_padding) {
13663   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13664   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13665     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13666       DeconvolutionOperatorTester()
13667         .batch_size(2)
13668         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13669         .padding_width(1)
13670         .padding_top(padding_top)
13671         .padding_bottom(padding_bottom)
13672         .kernel_size(3, 3)
13673         .group_input_channels(15)
13674         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13675         .iterations(1)
13676         .TestF32();
13677     }
13678   }
13679 }
13680 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_width_padding)13681 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_width_padding) {
13682   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13683   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
13684     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
13685       DeconvolutionOperatorTester()
13686         .batch_size(2)
13687         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13688         .padding_height(1)
13689         .padding_left(padding_left)
13690         .padding_right(padding_right)
13691         .kernel_size(3, 3)
13692         .group_input_channels(15)
13693         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13694         .iterations(1)
13695         .TestF32();
13696     }
13697   }
13698 }
13699 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_height_adjustment)13700 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_height_adjustment) {
13701   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13702   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
13703     DeconvolutionOperatorTester()
13704       .batch_size(2)
13705       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13706       .padding(1)
13707       .stride_height(adjustment_height + 1)
13708       .adjustment_height(adjustment_height)
13709       .kernel_size(3, 3)
13710       .group_input_channels(15)
13711       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13712       .iterations(1)
13713       .TestF32();
13714   }
13715 }
13716 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_width_adjustment)13717 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_width_adjustment) {
13718   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13719   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
13720     DeconvolutionOperatorTester()
13721       .batch_size(2)
13722       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13723       .padding(1)
13724       .stride_width(adjustment_width + 1)
13725       .adjustment_width(adjustment_width)
13726       .kernel_size(3, 3)
13727       .group_input_channels(15)
13728       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13729       .iterations(1)
13730       .TestF32();
13731   }
13732 }
13733 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_input_height)13734 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_input_height) {
13735   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13736   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
13737     DeconvolutionOperatorTester()
13738       .batch_size(2)
13739       .input_size(input_height, kUnstridedInputWidth)
13740       .padding(1)
13741       .kernel_size(3, 3)
13742       .group_input_channels(15)
13743       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13744       .iterations(1)
13745       .TestF32();
13746   }
13747 }
13748 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_input_width)13749 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_input_width) {
13750   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13751   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
13752     DeconvolutionOperatorTester()
13753       .batch_size(2)
13754       .input_size(kUnstridedInputHeight, input_width)
13755       .padding(1)
13756       .kernel_size(3, 3)
13757       .group_input_channels(15)
13758       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13759       .iterations(1)
13760       .TestF32();
13761   }
13762 }
13763 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_input_channels)13764 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_input_channels) {
13765   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13766   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
13767     DeconvolutionOperatorTester()
13768       .batch_size(2)
13769       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13770       .padding(1)
13771       .kernel_size(3, 3)
13772       .group_input_channels(input_channels)
13773       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13774       .iterations(1)
13775       .TestF32();
13776   }
13777 }
13778 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_varying_output_channels)13779 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_varying_output_channels) {
13780   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13781   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
13782     DeconvolutionOperatorTester()
13783       .batch_size(2)
13784       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13785       .padding(1)
13786       .kernel_size(3, 3)
13787       .group_input_channels(23)
13788       .group_output_channels(output_channels)
13789       .iterations(1)
13790       .TestF32();
13791   }
13792 }
13793 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_height_dilation)13794 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_height_dilation) {
13795   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13796   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
13797     DeconvolutionOperatorTester()
13798       .batch_size(2)
13799       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13800       .padding(1)
13801       .kernel_size(3, 3)
13802       .dilation_height(dilation_height)
13803       .group_input_channels(23)
13804       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13805       .iterations(3)
13806       .TestF32();
13807   }
13808 }
13809 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_width_dilation)13810 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_width_dilation) {
13811   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13812   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
13813     DeconvolutionOperatorTester()
13814       .batch_size(2)
13815       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13816       .padding(1)
13817       .kernel_size(3, 3)
13818       .dilation_width(dilation_width)
13819       .group_input_channels(23)
13820       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13821       .iterations(3)
13822       .TestF32();
13823   }
13824 }
13825 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_height_dilation_and_stride)13826 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_height_dilation_and_stride) {
13827   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13828   DeconvolutionOperatorTester()
13829     .batch_size(2)
13830     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13831     .padding(1)
13832     .kernel_size(3, 3)
13833     .dilation_height(3)
13834     .stride_height(2)
13835     .group_input_channels(23)
13836     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13837     .iterations(3)
13838     .TestF32();
13839 }
13840 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_width_dilation_and_stride)13841 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_width_dilation_and_stride) {
13842   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13843   DeconvolutionOperatorTester()
13844     .batch_size(2)
13845     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13846     .padding(1)
13847     .kernel_size(3, 3)
13848     .dilation_width(3)
13849     .stride_width(2)
13850     .group_input_channels(23)
13851     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13852     .iterations(3)
13853     .TestF32();
13854 }
13855 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_input_stride)13856 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_input_stride) {
13857   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13858   DeconvolutionOperatorTester()
13859     .batch_size(2)
13860     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13861     .padding(1)
13862     .kernel_size(3, 3)
13863     .group_input_channels(23)
13864     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13865     .input_pixel_stride(28)
13866     .iterations(3)
13867     .TestF32();
13868 }
13869 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_output_stride)13870 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_output_stride) {
13871   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13872   DeconvolutionOperatorTester()
13873     .batch_size(2)
13874     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13875     .padding(1)
13876     .kernel_size(3, 3)
13877     .group_input_channels(23)
13878     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13879     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
13880     .iterations(3)
13881     .TestF32();
13882 }
13883 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_qmin)13884 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_qmin) {
13885   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13886   DeconvolutionOperatorTester()
13887     .batch_size(2)
13888     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13889     .padding(1)
13890     .kernel_size(3, 3)
13891     .group_input_channels(23)
13892     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13893     .qmin(128)
13894     .iterations(3)
13895     .TestF32();
13896 }
13897 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_with_qmax)13898 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_with_qmax) {
13899   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13900   DeconvolutionOperatorTester()
13901     .batch_size(2)
13902     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13903     .padding(1)
13904     .kernel_size(3, 3)
13905     .group_input_channels(23)
13906     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13907     .qmax(128)
13908     .iterations(3)
13909     .TestF32();
13910 }
13911 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3_without_bias)13912 TEST(DECONVOLUTION_NHWC_F32, batched_3x3_without_bias) {
13913   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13914   DeconvolutionOperatorTester()
13915     .has_bias(false)
13916     .batch_size(2)
13917     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13918     .padding(1)
13919     .kernel_size(3, 3)
13920     .group_input_channels(23)
13921     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13922     .iterations(3)
13923     .TestF32();
13924 }
13925 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_3x3)13926 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_3x3) {
13927   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13928   DeconvolutionOperatorTester()
13929     .batch_size(2)
13930     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13931     .padding(1)
13932     .kernel_size(3, 3)
13933     .group_input_channels(15)
13934     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13935     .use_weights_cache(true)
13936     .iterations(3)
13937     .TestF32();
13938 }
13939 
13940 
13941 /**************************** CONV path, grouped, batched ****************************/
13942 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3)13943 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3) {
13944   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13945   DeconvolutionOperatorTester()
13946     .batch_size(2)
13947     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13948     .padding(1)
13949     .kernel_size(3, 3)
13950     .groups(2)
13951     .group_input_channels(15)
13952     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13953     .iterations(3)
13954     .TestF32();
13955 }
13956 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_Kx3)13957 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_Kx3) {
13958   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13959   for (size_t kernel_height = 1; kernel_height <= 4; kernel_height *= 2) {
13960     DeconvolutionOperatorTester()
13961       .batch_size(2)
13962       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13963       .padding_width(1)
13964       .kernel_size(kernel_height, 3)
13965       .groups(2)
13966       .group_input_channels(17)
13967       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13968       .iterations(3)
13969       .TestF32();
13970   }
13971 }
13972 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3xK)13973 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3xK) {
13974   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13975   for (size_t kernel_width = 1; kernel_width <= 4; kernel_width *= 2) {
13976     DeconvolutionOperatorTester()
13977       .batch_size(2)
13978       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13979       .padding_height(1)
13980       .kernel_size(3, kernel_width)
13981       .groups(2)
13982       .group_input_channels(17)
13983       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
13984       .iterations(3)
13985       .TestF32();
13986   }
13987 }
13988 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_height_padding)13989 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_height_padding) {
13990   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
13991   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
13992     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
13993       DeconvolutionOperatorTester()
13994         .batch_size(2)
13995         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
13996         .padding_width(1)
13997         .padding_top(padding_top)
13998         .padding_bottom(padding_bottom)
13999         .kernel_size(3, 3)
14000         .groups(2)
14001         .group_input_channels(15)
14002         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14003         .iterations(1)
14004         .TestF32();
14005     }
14006   }
14007 }
14008 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_width_padding)14009 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_width_padding) {
14010   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14011   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
14012     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
14013       DeconvolutionOperatorTester()
14014         .batch_size(2)
14015         .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14016         .padding_height(1)
14017         .padding_left(padding_left)
14018         .padding_right(padding_right)
14019         .kernel_size(3, 3)
14020         .groups(2)
14021         .group_input_channels(15)
14022         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14023         .iterations(1)
14024         .TestF32();
14025     }
14026   }
14027 }
14028 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_height_adjustment)14029 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_height_adjustment) {
14030   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14031   for (size_t adjustment_height = 1; adjustment_height <= 2; adjustment_height++) {
14032     DeconvolutionOperatorTester()
14033       .batch_size(2)
14034       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14035       .padding(1)
14036       .stride_height(adjustment_height + 1)
14037       .adjustment_height(adjustment_height)
14038       .kernel_size(3, 3)
14039       .groups(2)
14040       .group_input_channels(15)
14041       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14042       .iterations(1)
14043       .TestF32();
14044   }
14045 }
14046 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_width_adjustment)14047 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_width_adjustment) {
14048   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14049   for (size_t adjustment_width = 1; adjustment_width <= 2; adjustment_width++) {
14050     DeconvolutionOperatorTester()
14051       .batch_size(2)
14052       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14053       .padding(1)
14054       .stride_width(adjustment_width + 1)
14055       .adjustment_width(adjustment_width)
14056       .kernel_size(3, 3)
14057       .groups(2)
14058       .group_input_channels(15)
14059       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14060       .iterations(1)
14061       .TestF32();
14062   }
14063 }
14064 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_input_height)14065 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_input_height) {
14066   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14067   for (size_t input_height = kUnstridedInputHeight - 2; input_height <= kUnstridedInputHeight + 2; input_height++) {
14068     DeconvolutionOperatorTester()
14069       .batch_size(2)
14070       .input_size(input_height, kUnstridedInputWidth)
14071       .padding(1)
14072       .kernel_size(3, 3)
14073       .groups(2)
14074       .group_input_channels(15)
14075       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14076       .iterations(1)
14077       .TestF32();
14078   }
14079 }
14080 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_input_width)14081 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_input_width) {
14082   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14083   for (size_t input_width = kUnstridedInputWidth - 2; input_width <= kUnstridedInputWidth + 2; input_width++) {
14084     DeconvolutionOperatorTester()
14085       .batch_size(2)
14086       .input_size(kUnstridedInputHeight, input_width)
14087       .padding(1)
14088       .kernel_size(3, 3)
14089       .groups(2)
14090       .group_input_channels(15)
14091       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14092       .iterations(1)
14093       .TestF32();
14094   }
14095 }
14096 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_input_channels)14097 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_input_channels) {
14098   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14099   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
14100     DeconvolutionOperatorTester()
14101       .batch_size(2)
14102       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14103       .padding(1)
14104       .kernel_size(3, 3)
14105       .groups(2)
14106       .group_input_channels(input_channels)
14107       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14108       .iterations(1)
14109       .TestF32();
14110   }
14111 }
14112 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_varying_output_channels)14113 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_varying_output_channels) {
14114   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14115   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
14116     DeconvolutionOperatorTester()
14117       .batch_size(2)
14118       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14119       .padding(1)
14120       .kernel_size(3, 3)
14121       .groups(2)
14122       .group_input_channels(23)
14123       .group_output_channels(output_channels)
14124       .iterations(1)
14125       .TestF32();
14126   }
14127 }
14128 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_height_dilation)14129 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_height_dilation) {
14130   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14131   for (size_t dilation_height = 2; dilation_height <= 3; dilation_height++) {
14132     DeconvolutionOperatorTester()
14133       .batch_size(2)
14134       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14135       .padding(1)
14136       .kernel_size(3, 3)
14137       .dilation_height(dilation_height)
14138       .groups(2)
14139       .group_input_channels(23)
14140       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14141       .iterations(3)
14142       .TestF32();
14143   }
14144 }
14145 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_width_dilation)14146 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_width_dilation) {
14147   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14148   for (size_t dilation_width = 2; dilation_width <= 3; dilation_width++) {
14149     DeconvolutionOperatorTester()
14150       .batch_size(2)
14151       .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14152       .padding(1)
14153       .kernel_size(3, 3)
14154       .dilation_width(dilation_width)
14155       .groups(2)
14156       .group_input_channels(23)
14157       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14158       .iterations(3)
14159       .TestF32();
14160   }
14161 }
14162 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_height_dilation_and_stride)14163 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_height_dilation_and_stride) {
14164   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14165   DeconvolutionOperatorTester()
14166     .batch_size(2)
14167     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14168     .padding(1)
14169     .kernel_size(3, 3)
14170     .dilation_height(3)
14171     .stride_width(2)
14172     .groups(2)
14173     .group_input_channels(23)
14174     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14175     .iterations(3)
14176     .TestF32();
14177 }
14178 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_width_dilation_and_stride)14179 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_width_dilation_and_stride) {
14180   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14181   DeconvolutionOperatorTester()
14182     .batch_size(2)
14183     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14184     .padding(1)
14185     .kernel_size(3, 3)
14186     .dilation_width(3)
14187     .stride_width(2)
14188     .groups(2)
14189     .group_input_channels(23)
14190     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14191     .iterations(3)
14192     .TestF32();
14193 }
14194 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_input_stride)14195 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_input_stride) {
14196   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14197   DeconvolutionOperatorTester()
14198     .batch_size(2)
14199     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14200     .padding(1)
14201     .kernel_size(3, 3)
14202     .groups(2)
14203     .group_input_channels(23)
14204     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14205     .input_pixel_stride(47)
14206     .iterations(3)
14207     .TestF32();
14208 }
14209 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_output_stride)14210 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_output_stride) {
14211   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14212   DeconvolutionOperatorTester()
14213     .batch_size(2)
14214     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14215     .padding(1)
14216     .kernel_size(3, 3)
14217     .groups(2)
14218     .group_input_channels(23)
14219     .group_output_channels(xnn_params.f32.gemm.nr + 3)
14220     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
14221     .iterations(3)
14222     .TestF32();
14223 }
14224 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_qmin)14225 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_qmin) {
14226   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14227   DeconvolutionOperatorTester()
14228     .batch_size(2)
14229     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14230     .padding(1)
14231     .kernel_size(3, 3)
14232     .groups(2)
14233     .group_input_channels(23)
14234     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14235     .qmin(128)
14236     .iterations(3)
14237     .TestF32();
14238 }
14239 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_with_qmax)14240 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_with_qmax) {
14241   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14242   DeconvolutionOperatorTester()
14243     .batch_size(2)
14244     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14245     .padding(1)
14246     .kernel_size(3, 3)
14247     .groups(2)
14248     .group_input_channels(23)
14249     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14250     .qmax(128)
14251     .iterations(3)
14252     .TestF32();
14253 }
14254 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3_without_bias)14255 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3_without_bias) {
14256   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14257   DeconvolutionOperatorTester()
14258     .has_bias(false)
14259     .batch_size(2)
14260     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14261     .padding(1)
14262     .kernel_size(3, 3)
14263     .groups(2)
14264     .group_input_channels(23)
14265     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14266     .iterations(3)
14267     .TestF32();
14268 }
14269 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_grouped_3x3)14270 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_grouped_3x3) {
14271   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14272   DeconvolutionOperatorTester()
14273     .batch_size(2)
14274     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14275     .padding(1)
14276     .kernel_size(3, 3)
14277     .groups(2)
14278     .group_input_channels(15)
14279     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14280     .use_weights_cache(true)
14281     .iterations(3)
14282     .TestF32();
14283 }
14284 
14285 /**************************** CONV path, setup ****************************/
14286 
14287 TEST(DECONVOLUTION_NHWC_F32, 3x3_setup_changing_batch) {
14288   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14289   DeconvolutionOperatorTester()
14290     .batch_size(2)
14291     .next_batch_size(5)
14292     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14293     .kernel_height(3)
14294     .kernel_width(5)
14295     .groups(2)
14296     .group_input_channels(15)
14297     .group_output_channels(17)
14298     .TestSetupF32();
14299 }
14300 
14301 TEST(DECONVOLUTION_NHWC_F32, 3x3_setup_changing_height) {
14302   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14303   DeconvolutionOperatorTester()
14304     .batch_size(2)
14305     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14306     .next_input_height(kUnstridedInputHeight + 3)
14307     .kernel_height(3)
14308     .kernel_width(5)
14309     .groups(2)
14310     .group_input_channels(15)
14311     .group_output_channels(17)
14312     .TestSetupF32();
14313 }
14314 
14315 TEST(DECONVOLUTION_NHWC_F32, 3x3_setup_changing_width) {
14316   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14317   DeconvolutionOperatorTester()
14318     .batch_size(2)
14319     .input_size(kUnstridedInputHeight, kUnstridedInputWidth)
14320     .next_input_width(kUnstridedInputWidth + 3)
14321     .kernel_height(3)
14322     .kernel_width(5)
14323     .groups(2)
14324     .group_input_channels(15)
14325     .group_output_channels(17)
14326     .TestSetupF32();
14327 }
14328 
14329 /**************************** SUBCONV2D/IGEMM path ****************************/
14330 
14331 TEST(DECONVOLUTION_NHWC_F32, 3x3s2) {
14332   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14333   DeconvolutionOperatorTester()
14334     .input_size(kStridedInputHeight, kStridedInputWidth)
14335     .padding(1)
14336     .kernel_size(3, 3)
14337     .stride(2)
14338     .group_input_channels(15)
14339     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14340     .iterations(3)
14341     .TestF32();
14342 }
14343 
TEST(DECONVOLUTION_NHWC_F32,Kx3s2)14344 TEST(DECONVOLUTION_NHWC_F32, Kx3s2) {
14345   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14346   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
14347     DeconvolutionOperatorTester()
14348       .input_size(kStridedInputHeight, kStridedInputWidth)
14349       .padding_width(1)
14350       .kernel_size(kernel_height, 3)
14351       .stride(2)
14352       .group_input_channels(17)
14353       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14354       .iterations(3)
14355       .TestF32();
14356   }
14357 }
14358 
14359 TEST(DECONVOLUTION_NHWC_F32, 3xKs2) {
14360   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14361   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
14362     DeconvolutionOperatorTester()
14363       .input_size(kStridedInputHeight, kStridedInputWidth)
14364       .padding_height(1)
14365       .kernel_size(3, kernel_width)
14366       .stride(2)
14367       .group_input_channels(17)
14368       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14369       .iterations(3)
14370       .TestF32();
14371   }
14372 }
14373 
14374 TEST(DECONVOLUTION_NHWC_F32, 3x3sSx1) {
14375   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14376   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
14377     DeconvolutionOperatorTester()
14378       .input_size(kStridedInputHeight, kStridedInputWidth)
14379       .padding(1)
14380       .padding_width(1)
14381       .kernel_size(3, 3)
14382       .stride_height(stride_height)
14383       .group_input_channels(17)
14384       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14385       .iterations(3)
14386       .TestF32();
14387   }
14388 }
14389 
14390 TEST(DECONVOLUTION_NHWC_F32, 3x3s1xS) {
14391   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14392   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
14393     DeconvolutionOperatorTester()
14394       .input_size(kStridedInputHeight, kStridedInputWidth)
14395       .padding(1)
14396       .padding_width(1)
14397       .kernel_size(3, 3)
14398       .stride_width(stride_width)
14399       .group_input_channels(17)
14400       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14401       .iterations(3)
14402       .TestF32();
14403   }
14404 }
14405 
14406 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_height_padding) {
14407   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14408   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
14409     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
14410       DeconvolutionOperatorTester()
14411         .input_size(kStridedInputHeight, kStridedInputWidth)
14412         .padding_width(1)
14413         .padding_top(padding_top)
14414         .padding_bottom(padding_bottom)
14415         .kernel_size(3, 3)
14416         .stride(2)
14417         .group_input_channels(15)
14418         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14419         .iterations(1)
14420         .TestF32();
14421     }
14422   }
14423 }
14424 
14425 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_width_padding) {
14426   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14427   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
14428     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
14429       DeconvolutionOperatorTester()
14430         .input_size(kStridedInputHeight, kStridedInputWidth)
14431         .padding_height(1)
14432         .padding_left(padding_left)
14433         .padding_right(padding_right)
14434         .kernel_size(3, 3)
14435         .stride(2)
14436         .group_input_channels(15)
14437         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14438         .iterations(1)
14439         .TestF32();
14440     }
14441   }
14442 }
14443 
14444 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_height_adjustment) {
14445   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14446   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
14447     DeconvolutionOperatorTester()
14448       .input_size(kStridedInputHeight, kStridedInputWidth)
14449       .padding(1)
14450       .adjustment_height(adjustment_height)
14451       .kernel_size(3, 3)
14452       .stride(2)
14453       .group_input_channels(15)
14454       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14455       .iterations(1)
14456       .TestF32();
14457   }
14458 }
14459 
14460 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_width_adjustment) {
14461   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14462   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
14463     DeconvolutionOperatorTester()
14464       .input_size(kStridedInputHeight, kStridedInputWidth)
14465       .padding(1)
14466       .adjustment_width(adjustment_width)
14467       .kernel_size(3, 3)
14468       .stride(2)
14469       .group_input_channels(15)
14470       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14471       .iterations(1)
14472       .TestF32();
14473   }
14474 }
14475 
14476 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_input_height) {
14477   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14478   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
14479     DeconvolutionOperatorTester()
14480       .input_size(input_height, kStridedInputWidth)
14481       .padding(1)
14482       .kernel_size(3, 3)
14483       .stride(2)
14484       .group_input_channels(15)
14485       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14486       .iterations(1)
14487       .TestF32();
14488   }
14489 }
14490 
14491 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_input_width) {
14492   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14493   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
14494     DeconvolutionOperatorTester()
14495       .input_size(kStridedInputHeight, kStridedInputWidth)
14496       .padding(1)
14497       .kernel_size(3, 3)
14498       .stride(2)
14499       .group_input_channels(15)
14500       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14501       .iterations(1)
14502       .TestF32();
14503   }
14504 }
14505 
14506 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_input_channels) {
14507   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14508   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
14509     DeconvolutionOperatorTester()
14510       .input_size(kStridedInputHeight, kStridedInputWidth)
14511       .padding(1)
14512       .kernel_size(3, 3)
14513       .stride(2)
14514       .group_input_channels(input_channels)
14515       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14516       .iterations(1)
14517       .TestF32();
14518   }
14519 }
14520 
14521 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_varying_output_channels) {
14522   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14523   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
14524     DeconvolutionOperatorTester()
14525       .input_size(kStridedInputHeight, kStridedInputWidth)
14526       .padding(1)
14527       .kernel_size(3, 3)
14528       .stride(2)
14529       .group_input_channels(23)
14530       .group_output_channels(output_channels)
14531       .iterations(1)
14532       .TestF32();
14533   }
14534 }
14535 
14536 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_input_stride) {
14537   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14538   DeconvolutionOperatorTester()
14539     .input_size(kStridedInputHeight, kStridedInputWidth)
14540     .padding(1)
14541     .kernel_size(3, 3)
14542     .stride(2)
14543     .group_input_channels(23)
14544     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14545     .input_pixel_stride(28)
14546     .iterations(3)
14547     .TestF32();
14548 }
14549 
14550 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_output_stride) {
14551   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14552   DeconvolutionOperatorTester()
14553     .input_size(kStridedInputHeight, kStridedInputWidth)
14554     .padding(1)
14555     .kernel_size(3, 3)
14556     .stride(2)
14557     .group_input_channels(23)
14558     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14559     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
14560     .iterations(3)
14561     .TestF32();
14562 }
14563 
14564 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_qmin) {
14565   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14566   DeconvolutionOperatorTester()
14567     .input_size(kStridedInputHeight, kStridedInputWidth)
14568     .padding(1)
14569     .kernel_size(3, 3)
14570     .stride(2)
14571     .group_input_channels(23)
14572     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14573     .qmin(128)
14574     .iterations(3)
14575     .TestF32();
14576 }
14577 
14578 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_with_qmax) {
14579   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14580   DeconvolutionOperatorTester()
14581     .input_size(kStridedInputHeight, kStridedInputWidth)
14582     .padding(1)
14583     .kernel_size(3, 3)
14584     .stride(2)
14585     .group_input_channels(23)
14586     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14587     .qmax(128)
14588     .iterations(3)
14589     .TestF32();
14590 }
14591 
14592 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_without_bias) {
14593   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14594   DeconvolutionOperatorTester()
14595     .has_bias(false)
14596     .input_size(kStridedInputHeight, kStridedInputWidth)
14597     .padding(1)
14598     .kernel_size(3, 3)
14599     .stride(2)
14600     .group_input_channels(23)
14601     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14602     .iterations(3)
14603     .TestF32();
14604 }
14605 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_3x3s2)14606 TEST(DECONVOLUTION_NHWC_F32, weights_cache_3x3s2) {
14607   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14608   DeconvolutionOperatorTester()
14609     .input_size(kStridedInputHeight, kStridedInputWidth)
14610     .padding(1)
14611     .kernel_size(3, 3)
14612     .stride(2)
14613     .group_input_channels(15)
14614     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14615     .use_weights_cache(true)
14616     .iterations(3)
14617     .TestF32();
14618 }
14619 
TEST(DECONVOLUTION_NHWC_F32,stress_weights_cache_5x5s4)14620 TEST(DECONVOLUTION_NHWC_F32, stress_weights_cache_5x5s4) {
14621   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14622   DeconvolutionOperatorTester()
14623     .input_size(kStridedInputHeight, kStridedInputWidth)
14624     .padding(1)
14625     .kernel_size(5, 5)
14626     .stride(4)
14627     .group_input_channels(15)
14628     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14629     .iterations(60)  // Higher number of iterations to write more weights.
14630     .StressWeightsCacheTestF32();
14631 }
14632 
14633 /**************************** SUBCONV2D/IGEMM path, grouped ****************************/
14634 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2)14635 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2) {
14636   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14637   DeconvolutionOperatorTester()
14638     .input_size(kStridedInputHeight, kStridedInputWidth)
14639     .padding(1)
14640     .kernel_size(3, 3)
14641     .stride(2)
14642     .groups(2)
14643     .group_input_channels(17)
14644     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14645     .iterations(3)
14646     .TestF32();
14647 }
14648 
TEST(DECONVOLUTION_NHWC_F32,grouped_Kx3s2)14649 TEST(DECONVOLUTION_NHWC_F32, grouped_Kx3s2) {
14650   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14651   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
14652     DeconvolutionOperatorTester()
14653       .input_size(kStridedInputHeight, kStridedInputWidth)
14654       .padding_width(1)
14655       .kernel_size(kernel_height, 3)
14656       .stride(2)
14657       .groups(2)
14658       .group_input_channels(17)
14659       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14660       .iterations(3)
14661       .TestF32();
14662   }
14663 }
14664 
TEST(DECONVOLUTION_NHWC_F32,grouped_3xKs2)14665 TEST(DECONVOLUTION_NHWC_F32, grouped_3xKs2) {
14666   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14667   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
14668     DeconvolutionOperatorTester()
14669       .input_size(kStridedInputHeight, kStridedInputWidth)
14670       .padding_height(1)
14671       .kernel_size(3, kernel_width)
14672       .stride(2)
14673       .groups(2)
14674       .group_input_channels(17)
14675       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14676       .iterations(3)
14677       .TestF32();
14678   }
14679 }
14680 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3sSx1)14681 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3sSx1) {
14682   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14683   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
14684     DeconvolutionOperatorTester()
14685       .input_size(kStridedInputHeight, kStridedInputWidth)
14686       .padding(1)
14687       .padding_width(1)
14688       .kernel_size(3, 3)
14689       .stride_height(stride_height)
14690       .groups(2)
14691       .group_input_channels(17)
14692       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14693       .iterations(3)
14694       .TestF32();
14695   }
14696 }
14697 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s1xS)14698 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s1xS) {
14699   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14700   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
14701     DeconvolutionOperatorTester()
14702       .input_size(kStridedInputHeight, kStridedInputWidth)
14703       .padding(1)
14704       .padding_width(1)
14705       .kernel_size(3, 3)
14706       .stride_width(stride_width)
14707       .groups(2)
14708       .group_input_channels(17)
14709       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14710       .iterations(3)
14711       .TestF32();
14712   }
14713 }
14714 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_height_padding)14715 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_height_padding) {
14716   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14717   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
14718     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
14719       DeconvolutionOperatorTester()
14720         .input_size(kStridedInputHeight, kStridedInputWidth)
14721         .padding_width(1)
14722         .padding_top(padding_top)
14723         .padding_bottom(padding_bottom)
14724         .kernel_size(3, 3)
14725         .stride(2)
14726         .groups(2)
14727         .group_input_channels(17)
14728         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14729         .iterations(1)
14730         .TestF32();
14731     }
14732   }
14733 }
14734 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_width_padding)14735 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_width_padding) {
14736   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14737   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
14738     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
14739       DeconvolutionOperatorTester()
14740         .input_size(kStridedInputHeight, kStridedInputWidth)
14741         .padding_height(1)
14742         .padding_left(padding_left)
14743         .padding_right(padding_right)
14744         .kernel_size(3, 3)
14745         .stride(2)
14746         .groups(2)
14747         .group_input_channels(17)
14748         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14749         .iterations(1)
14750         .TestF32();
14751     }
14752   }
14753 }
14754 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_height_adjustment)14755 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_height_adjustment) {
14756   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14757   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
14758     DeconvolutionOperatorTester()
14759       .input_size(kStridedInputHeight, kStridedInputWidth)
14760       .padding(1)
14761       .adjustment_height(adjustment_height)
14762       .kernel_size(3, 3)
14763       .stride(2)
14764       .groups(2)
14765       .group_input_channels(17)
14766       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14767       .iterations(1)
14768       .TestF32();
14769   }
14770 }
14771 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_width_adjustment)14772 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_width_adjustment) {
14773   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14774   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
14775     DeconvolutionOperatorTester()
14776       .input_size(kStridedInputHeight, kStridedInputWidth)
14777       .padding(1)
14778       .adjustment_width(adjustment_width)
14779       .kernel_size(3, 3)
14780       .stride(2)
14781       .groups(2)
14782       .group_input_channels(17)
14783       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14784       .iterations(1)
14785       .TestF32();
14786   }
14787 }
14788 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_input_height)14789 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_input_height) {
14790   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14791   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
14792     DeconvolutionOperatorTester()
14793       .input_size(input_height, kStridedInputWidth)
14794       .padding(1)
14795       .kernel_size(3, 3)
14796       .stride(2)
14797       .groups(2)
14798       .group_input_channels(17)
14799       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14800       .iterations(1)
14801       .TestF32();
14802   }
14803 }
14804 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_input_width)14805 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_input_width) {
14806   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14807   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
14808     DeconvolutionOperatorTester()
14809       .input_size(kStridedInputHeight, kStridedInputWidth)
14810       .padding(1)
14811       .kernel_size(3, 3)
14812       .stride(2)
14813       .groups(2)
14814       .group_input_channels(17)
14815       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14816       .iterations(1)
14817       .TestF32();
14818   }
14819 }
14820 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_input_channels)14821 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_input_channels) {
14822   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14823   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
14824     DeconvolutionOperatorTester()
14825       .input_size(kStridedInputHeight, kStridedInputWidth)
14826       .padding(1)
14827       .kernel_size(3, 3)
14828       .stride(2)
14829       .groups(2)
14830       .group_input_channels(input_channels)
14831       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14832       .iterations(1)
14833       .TestF32();
14834   }
14835 }
14836 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_varying_output_channels)14837 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_varying_output_channels) {
14838   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14839   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
14840     DeconvolutionOperatorTester()
14841       .input_size(kStridedInputHeight, kStridedInputWidth)
14842       .padding(1)
14843       .kernel_size(3, 3)
14844       .stride(2)
14845       .groups(2)
14846       .group_input_channels(17)
14847       .group_output_channels(output_channels)
14848       .iterations(1)
14849       .TestF32();
14850   }
14851 }
14852 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_input_stride)14853 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_input_stride) {
14854   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14855   DeconvolutionOperatorTester()
14856     .input_size(kStridedInputHeight, kStridedInputWidth)
14857     .padding(1)
14858     .kernel_size(3, 3)
14859     .stride(2)
14860     .groups(2)
14861     .group_input_channels(17)
14862     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14863     .input_pixel_stride(37)
14864     .iterations(3)
14865     .TestF32();
14866 }
14867 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_output_stride)14868 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_output_stride) {
14869   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14870   DeconvolutionOperatorTester()
14871     .input_size(kStridedInputHeight, kStridedInputWidth)
14872     .padding(1)
14873     .kernel_size(3, 3)
14874     .stride(2)
14875     .groups(2)
14876     .group_input_channels(17)
14877     .group_output_channels(xnn_params.f32.gemm.nr + 3)
14878     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
14879     .iterations(3)
14880     .TestF32();
14881 }
14882 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_qmin)14883 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_qmin) {
14884   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14885   DeconvolutionOperatorTester()
14886     .input_size(kStridedInputHeight, kStridedInputWidth)
14887     .padding(1)
14888     .kernel_size(3, 3)
14889     .stride(2)
14890     .groups(2)
14891     .group_input_channels(17)
14892     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14893     .qmin(128)
14894     .iterations(3)
14895     .TestF32();
14896 }
14897 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_with_qmax)14898 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_with_qmax) {
14899   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14900   DeconvolutionOperatorTester()
14901     .input_size(kStridedInputHeight, kStridedInputWidth)
14902     .padding(1)
14903     .kernel_size(3, 3)
14904     .stride(2)
14905     .groups(2)
14906     .group_input_channels(17)
14907     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14908     .qmax(128)
14909     .iterations(3)
14910     .TestF32();
14911 }
14912 
TEST(DECONVOLUTION_NHWC_F32,grouped_3x3s2_without_bias)14913 TEST(DECONVOLUTION_NHWC_F32, grouped_3x3s2_without_bias) {
14914   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14915   DeconvolutionOperatorTester()
14916     .has_bias(false)
14917     .input_size(kStridedInputHeight, kStridedInputWidth)
14918     .padding(1)
14919     .kernel_size(3, 3)
14920     .stride(2)
14921     .groups(2)
14922     .group_input_channels(17)
14923     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14924     .iterations(3)
14925     .TestF32();
14926 }
14927 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_grouped_3x3s2)14928 TEST(DECONVOLUTION_NHWC_F32, weights_cache_grouped_3x3s2) {
14929   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14930   DeconvolutionOperatorTester()
14931     .input_size(kStridedInputHeight, kStridedInputWidth)
14932     .padding(1)
14933     .kernel_size(3, 3)
14934     .stride(2)
14935     .groups(2)
14936     .group_input_channels(17)
14937     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14938     .use_weights_cache(true)
14939     .iterations(3)
14940     .TestF32();
14941 }
14942 
14943 /**************************** SUBCONV2D/IGEMM path, batched ****************************/
14944 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2)14945 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2) {
14946   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14947   DeconvolutionOperatorTester()
14948     .batch_size(2)
14949     .input_size(kStridedInputHeight, kStridedInputWidth)
14950     .padding(1)
14951     .kernel_size(3, 3)
14952     .stride(2)
14953     .group_input_channels(15)
14954     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14955     .iterations(3)
14956     .TestF32();
14957 }
14958 
TEST(DECONVOLUTION_NHWC_F32,batched_Kx3s2)14959 TEST(DECONVOLUTION_NHWC_F32, batched_Kx3s2) {
14960   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14961   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
14962     DeconvolutionOperatorTester()
14963       .batch_size(2)
14964       .input_size(kStridedInputHeight, kStridedInputWidth)
14965       .padding_width(1)
14966       .kernel_size(kernel_height, 3)
14967       .stride(2)
14968       .group_input_channels(17)
14969       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14970       .iterations(3)
14971       .TestF32();
14972   }
14973 }
14974 
TEST(DECONVOLUTION_NHWC_F32,batched_3xKs2)14975 TEST(DECONVOLUTION_NHWC_F32, batched_3xKs2) {
14976   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14977   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
14978     DeconvolutionOperatorTester()
14979       .batch_size(2)
14980       .input_size(kStridedInputHeight, kStridedInputWidth)
14981       .padding_height(1)
14982       .kernel_size(3, kernel_width)
14983       .stride(2)
14984       .group_input_channels(17)
14985       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
14986       .iterations(3)
14987       .TestF32();
14988   }
14989 }
14990 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3sSx1)14991 TEST(DECONVOLUTION_NHWC_F32, batched_3x3sSx1) {
14992   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
14993   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
14994     DeconvolutionOperatorTester()
14995       .batch_size(2)
14996       .input_size(kStridedInputHeight, kStridedInputWidth)
14997       .padding(1)
14998       .padding_width(1)
14999       .kernel_size(3, 3)
15000       .stride_height(stride_height)
15001       .group_input_channels(17)
15002       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15003       .iterations(3)
15004       .TestF32();
15005   }
15006 }
15007 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s1xS)15008 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s1xS) {
15009   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15010   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
15011     DeconvolutionOperatorTester()
15012       .batch_size(2)
15013       .input_size(kStridedInputHeight, kStridedInputWidth)
15014       .padding(1)
15015       .padding_width(1)
15016       .kernel_size(3, 3)
15017       .stride_width(stride_width)
15018       .group_input_channels(17)
15019       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15020       .iterations(3)
15021       .TestF32();
15022   }
15023 }
15024 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_height_padding)15025 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_height_padding) {
15026   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15027   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
15028     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
15029       DeconvolutionOperatorTester()
15030         .batch_size(2)
15031         .input_size(kStridedInputHeight, kStridedInputWidth)
15032         .padding_width(1)
15033         .padding_top(padding_top)
15034         .padding_bottom(padding_bottom)
15035         .kernel_size(3, 3)
15036         .stride(2)
15037         .group_input_channels(15)
15038         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15039         .iterations(1)
15040         .TestF32();
15041     }
15042   }
15043 }
15044 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_width_padding)15045 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_width_padding) {
15046   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15047   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
15048     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
15049       DeconvolutionOperatorTester()
15050         .batch_size(2)
15051         .input_size(kStridedInputHeight, kStridedInputWidth)
15052         .padding_height(1)
15053         .padding_left(padding_left)
15054         .padding_right(padding_right)
15055         .kernel_size(3, 3)
15056         .stride(2)
15057         .group_input_channels(15)
15058         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15059         .iterations(1)
15060         .TestF32();
15061     }
15062   }
15063 }
15064 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_height_adjustment)15065 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_height_adjustment) {
15066   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15067   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
15068     DeconvolutionOperatorTester()
15069       .batch_size(2)
15070       .input_size(kStridedInputHeight, kStridedInputWidth)
15071       .padding(1)
15072       .adjustment_height(adjustment_height)
15073       .kernel_size(3, 3)
15074       .stride(2)
15075       .group_input_channels(15)
15076       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15077       .iterations(1)
15078       .TestF32();
15079   }
15080 }
15081 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_width_adjustment)15082 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_width_adjustment) {
15083   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15084   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
15085     DeconvolutionOperatorTester()
15086       .batch_size(2)
15087       .input_size(kStridedInputHeight, kStridedInputWidth)
15088       .padding(1)
15089       .adjustment_width(adjustment_width)
15090       .kernel_size(3, 3)
15091       .stride(2)
15092       .group_input_channels(15)
15093       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15094       .iterations(1)
15095       .TestF32();
15096   }
15097 }
15098 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_input_height)15099 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_input_height) {
15100   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15101   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15102     DeconvolutionOperatorTester()
15103       .batch_size(2)
15104       .input_size(input_height, kStridedInputWidth)
15105       .padding(1)
15106       .kernel_size(3, 3)
15107       .stride(2)
15108       .group_input_channels(15)
15109       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15110       .iterations(1)
15111       .TestF32();
15112   }
15113 }
15114 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_input_width)15115 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_input_width) {
15116   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15117   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15118     DeconvolutionOperatorTester()
15119       .batch_size(2)
15120       .input_size(kStridedInputHeight, kStridedInputWidth)
15121       .padding(1)
15122       .kernel_size(3, 3)
15123       .stride(2)
15124       .group_input_channels(15)
15125       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15126       .iterations(1)
15127       .TestF32();
15128   }
15129 }
15130 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_input_channels)15131 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_input_channels) {
15132   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15133   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
15134     DeconvolutionOperatorTester()
15135       .batch_size(2)
15136       .input_size(kStridedInputHeight, kStridedInputWidth)
15137       .padding(1)
15138       .kernel_size(3, 3)
15139       .stride(2)
15140       .group_input_channels(input_channels)
15141       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15142       .iterations(1)
15143       .TestF32();
15144   }
15145 }
15146 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_varying_output_channels)15147 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_varying_output_channels) {
15148   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15149   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15150     DeconvolutionOperatorTester()
15151       .batch_size(2)
15152       .input_size(kStridedInputHeight, kStridedInputWidth)
15153       .padding(1)
15154       .kernel_size(3, 3)
15155       .stride(2)
15156       .group_input_channels(23)
15157       .group_output_channels(output_channels)
15158       .iterations(1)
15159       .TestF32();
15160   }
15161 }
15162 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_input_stride)15163 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_input_stride) {
15164   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15165   DeconvolutionOperatorTester()
15166     .batch_size(2)
15167     .input_size(kStridedInputHeight, kStridedInputWidth)
15168     .padding(1)
15169     .kernel_size(3, 3)
15170     .stride(2)
15171     .group_input_channels(23)
15172     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15173     .input_pixel_stride(28)
15174     .iterations(3)
15175     .TestF32();
15176 }
15177 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_output_stride)15178 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_output_stride) {
15179   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15180   DeconvolutionOperatorTester()
15181     .batch_size(2)
15182     .input_size(kStridedInputHeight, kStridedInputWidth)
15183     .padding(1)
15184     .kernel_size(3, 3)
15185     .stride(2)
15186     .group_input_channels(23)
15187     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15188     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15189     .iterations(3)
15190     .TestF32();
15191 }
15192 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_qmin)15193 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_qmin) {
15194   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15195   DeconvolutionOperatorTester()
15196     .batch_size(2)
15197     .input_size(kStridedInputHeight, kStridedInputWidth)
15198     .padding(1)
15199     .kernel_size(3, 3)
15200     .stride(2)
15201     .group_input_channels(23)
15202     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15203     .qmin(128)
15204     .iterations(3)
15205     .TestF32();
15206 }
15207 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_with_qmax)15208 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_with_qmax) {
15209   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15210   DeconvolutionOperatorTester()
15211     .batch_size(2)
15212     .input_size(kStridedInputHeight, kStridedInputWidth)
15213     .padding(1)
15214     .kernel_size(3, 3)
15215     .stride(2)
15216     .group_input_channels(23)
15217     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15218     .qmax(128)
15219     .iterations(3)
15220     .TestF32();
15221 }
15222 
TEST(DECONVOLUTION_NHWC_F32,batched_3x3s2_without_bias)15223 TEST(DECONVOLUTION_NHWC_F32, batched_3x3s2_without_bias) {
15224   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15225   DeconvolutionOperatorTester()
15226     .has_bias(false)
15227     .batch_size(2)
15228     .input_size(kStridedInputHeight, kStridedInputWidth)
15229     .padding(1)
15230     .kernel_size(3, 3)
15231     .stride(2)
15232     .group_input_channels(23)
15233     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15234     .iterations(3)
15235     .TestF32();
15236 }
15237 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_3x3s2)15238 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_3x3s2) {
15239   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15240   DeconvolutionOperatorTester()
15241     .batch_size(2)
15242     .input_size(kStridedInputHeight, kStridedInputWidth)
15243     .padding(1)
15244     .kernel_size(3, 3)
15245     .stride(2)
15246     .group_input_channels(15)
15247     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15248     .use_weights_cache(true)
15249     .iterations(3)
15250     .TestF32();
15251 }
15252 
15253 /**************************** SUBCONV2D/IGEMM path, grouped, batched ****************************/
15254 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2)15255 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2) {
15256   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15257   DeconvolutionOperatorTester()
15258     .batch_size(2)
15259     .input_size(kStridedInputHeight, kStridedInputWidth)
15260     .padding(1)
15261     .kernel_size(3, 3)
15262     .stride(2)
15263     .groups(2)
15264     .group_input_channels(17)
15265     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15266     .iterations(3)
15267     .TestF32();
15268 }
15269 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_Kx3s2)15270 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_Kx3s2) {
15271   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15272   for (size_t kernel_height = 2; kernel_height <= 5; kernel_height++) {
15273     DeconvolutionOperatorTester()
15274       .batch_size(2)
15275       .input_size(kStridedInputHeight, kStridedInputWidth)
15276       .padding_width(1)
15277       .kernel_size(kernel_height, 3)
15278       .stride(2)
15279       .groups(2)
15280       .group_input_channels(17)
15281       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15282       .iterations(3)
15283       .TestF32();
15284   }
15285 }
15286 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3xKs2)15287 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3xKs2) {
15288   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15289   for (size_t kernel_width = 2; kernel_width <= 5; kernel_width++) {
15290     DeconvolutionOperatorTester()
15291       .batch_size(2)
15292       .input_size(kStridedInputHeight, kStridedInputWidth)
15293       .padding_height(1)
15294       .kernel_size(3, kernel_width)
15295       .stride(2)
15296       .groups(2)
15297       .group_input_channels(17)
15298       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15299       .iterations(3)
15300       .TestF32();
15301   }
15302 }
15303 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3sSx1)15304 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3sSx1) {
15305   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15306   for (size_t stride_height = 2; stride_height <= 3; stride_height++) {
15307     DeconvolutionOperatorTester()
15308       .batch_size(2)
15309       .input_size(kStridedInputHeight, kStridedInputWidth)
15310       .padding(1)
15311       .padding_width(1)
15312       .kernel_size(3, 3)
15313       .stride_height(stride_height)
15314       .groups(2)
15315       .group_input_channels(17)
15316       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15317       .iterations(3)
15318       .TestF32();
15319   }
15320 }
15321 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s1xS)15322 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s1xS) {
15323   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15324   for (size_t stride_width = 2; stride_width <= 3; stride_width++) {
15325     DeconvolutionOperatorTester()
15326       .batch_size(2)
15327       .input_size(kStridedInputHeight, kStridedInputWidth)
15328       .padding(1)
15329       .padding_width(1)
15330       .kernel_size(3, 3)
15331       .stride_width(stride_width)
15332       .groups(2)
15333       .group_input_channels(17)
15334       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15335       .iterations(3)
15336       .TestF32();
15337   }
15338 }
15339 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_height_padding)15340 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_height_padding) {
15341   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15342   for (size_t padding_top = 0; padding_top <= 2; padding_top++) {
15343     for (size_t padding_bottom = 0; padding_bottom <= 2; padding_bottom++) {
15344       DeconvolutionOperatorTester()
15345         .batch_size(2)
15346         .input_size(kStridedInputHeight, kStridedInputWidth)
15347         .padding_width(1)
15348         .padding_top(padding_top)
15349         .padding_bottom(padding_bottom)
15350         .kernel_size(3, 3)
15351         .stride(2)
15352         .groups(2)
15353         .group_input_channels(17)
15354         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15355         .iterations(1)
15356         .TestF32();
15357     }
15358   }
15359 }
15360 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_width_padding)15361 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_width_padding) {
15362   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15363   for (size_t padding_left = 0; padding_left <= 2; padding_left++) {
15364     for (size_t padding_right = 0; padding_right <= 2; padding_right++) {
15365       DeconvolutionOperatorTester()
15366         .batch_size(2)
15367         .input_size(kStridedInputHeight, kStridedInputWidth)
15368         .padding_height(1)
15369         .padding_left(padding_left)
15370         .padding_right(padding_right)
15371         .kernel_size(3, 3)
15372         .stride(2)
15373         .groups(2)
15374         .group_input_channels(17)
15375         .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15376         .iterations(1)
15377         .TestF32();
15378     }
15379   }
15380 }
15381 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_height_adjustment)15382 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_height_adjustment) {
15383   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15384   for (size_t adjustment_height = 0; adjustment_height <= 1; adjustment_height++) {
15385     DeconvolutionOperatorTester()
15386       .batch_size(2)
15387       .input_size(kStridedInputHeight, kStridedInputWidth)
15388       .padding(1)
15389       .adjustment_height(adjustment_height)
15390       .kernel_size(3, 3)
15391       .stride(2)
15392       .groups(2)
15393       .group_input_channels(17)
15394       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15395       .iterations(1)
15396       .TestF32();
15397   }
15398 }
15399 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_width_adjustment)15400 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_width_adjustment) {
15401   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15402   for (size_t adjustment_width = 0; adjustment_width <= 1; adjustment_width++) {
15403     DeconvolutionOperatorTester()
15404       .batch_size(2)
15405       .input_size(kStridedInputHeight, kStridedInputWidth)
15406       .padding(1)
15407       .adjustment_width(adjustment_width)
15408       .kernel_size(3, 3)
15409       .stride(2)
15410       .groups(2)
15411       .group_input_channels(17)
15412       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15413       .iterations(1)
15414       .TestF32();
15415   }
15416 }
15417 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_input_height)15418 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_input_height) {
15419   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15420   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15421     DeconvolutionOperatorTester()
15422       .batch_size(2)
15423       .input_size(input_height, kStridedInputWidth)
15424       .padding(1)
15425       .kernel_size(3, 3)
15426       .stride(2)
15427       .groups(2)
15428       .group_input_channels(17)
15429       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15430       .iterations(1)
15431       .TestF32();
15432   }
15433 }
15434 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_input_width)15435 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_input_width) {
15436   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15437   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15438     DeconvolutionOperatorTester()
15439       .batch_size(2)
15440       .input_size(kStridedInputHeight, kStridedInputWidth)
15441       .padding(1)
15442       .kernel_size(3, 3)
15443       .stride(2)
15444       .groups(2)
15445       .group_input_channels(17)
15446       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15447       .iterations(1)
15448       .TestF32();
15449   }
15450 }
15451 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_input_channels)15452 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_input_channels) {
15453   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15454   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
15455     DeconvolutionOperatorTester()
15456       .batch_size(2)
15457       .input_size(kStridedInputHeight, kStridedInputWidth)
15458       .padding(1)
15459       .kernel_size(3, 3)
15460       .stride(2)
15461       .groups(2)
15462       .group_input_channels(input_channels)
15463       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15464       .iterations(1)
15465       .TestF32();
15466   }
15467 }
15468 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_varying_output_channels)15469 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_varying_output_channels) {
15470   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15471   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15472     DeconvolutionOperatorTester()
15473       .batch_size(2)
15474       .input_size(kStridedInputHeight, kStridedInputWidth)
15475       .padding(1)
15476       .kernel_size(3, 3)
15477       .stride(2)
15478       .groups(2)
15479       .group_input_channels(17)
15480       .group_output_channels(output_channels)
15481       .iterations(1)
15482       .TestF32();
15483   }
15484 }
15485 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_input_stride)15486 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_input_stride) {
15487   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15488   DeconvolutionOperatorTester()
15489     .batch_size(2)
15490     .input_size(kStridedInputHeight, kStridedInputWidth)
15491     .padding(1)
15492     .kernel_size(3, 3)
15493     .stride(2)
15494     .groups(2)
15495     .group_input_channels(17)
15496     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15497     .input_pixel_stride(37)
15498     .iterations(3)
15499     .TestF32();
15500 }
15501 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_output_stride)15502 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_output_stride) {
15503   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15504   DeconvolutionOperatorTester()
15505     .batch_size(2)
15506     .input_size(kStridedInputHeight, kStridedInputWidth)
15507     .padding(1)
15508     .kernel_size(3, 3)
15509     .stride(2)
15510     .groups(2)
15511     .group_input_channels(17)
15512     .group_output_channels(xnn_params.f32.gemm.nr + 3)
15513     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15514     .iterations(3)
15515     .TestF32();
15516 }
15517 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_qmin)15518 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_qmin) {
15519   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15520   DeconvolutionOperatorTester()
15521     .batch_size(2)
15522     .input_size(kStridedInputHeight, kStridedInputWidth)
15523     .padding(1)
15524     .kernel_size(3, 3)
15525     .stride(2)
15526     .groups(2)
15527     .group_input_channels(17)
15528     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15529     .qmin(128)
15530     .iterations(3)
15531     .TestF32();
15532 }
15533 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_with_qmax)15534 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_with_qmax) {
15535   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15536   DeconvolutionOperatorTester()
15537     .batch_size(2)
15538     .input_size(kStridedInputHeight, kStridedInputWidth)
15539     .padding(1)
15540     .kernel_size(3, 3)
15541     .stride(2)
15542     .groups(2)
15543     .group_input_channels(17)
15544     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15545     .qmax(128)
15546     .iterations(3)
15547     .TestF32();
15548 }
15549 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_3x3s2_without_bias)15550 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_3x3s2_without_bias) {
15551   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15552   DeconvolutionOperatorTester()
15553     .has_bias(false)
15554     .batch_size(2)
15555     .input_size(kStridedInputHeight, kStridedInputWidth)
15556     .padding(1)
15557     .kernel_size(3, 3)
15558     .stride(2)
15559     .groups(2)
15560     .group_input_channels(17)
15561     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15562     .iterations(3)
15563     .TestF32();
15564 }
15565 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_grouped_3x3s2)15566 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_grouped_3x3s2) {
15567   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15568   DeconvolutionOperatorTester()
15569     .batch_size(2)
15570     .input_size(kStridedInputHeight, kStridedInputWidth)
15571     .padding(1)
15572     .kernel_size(3, 3)
15573     .stride(2)
15574     .groups(2)
15575     .group_input_channels(17)
15576     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15577     .use_weights_cache(true)
15578     .iterations(3)
15579     .TestF32();
15580 }
15581 
15582 /**************************** SUBCONV2D/IGEMM path, setup ****************************/
15583 
15584 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_setup_changing_batch) {
15585   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15586   DeconvolutionOperatorTester()
15587     .batch_size(2)
15588     .next_batch_size(5)
15589     .input_size(kStridedInputHeight, kStridedInputWidth)
15590     .kernel_size(3, 3)
15591     .stride(2)
15592     .groups(2)
15593     .group_input_channels(15)
15594     .group_output_channels(17)
15595     .TestSetupF32();
15596 }
15597 
15598 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_setup_changing_height) {
15599   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15600   DeconvolutionOperatorTester()
15601     .batch_size(2)
15602     .input_size(kStridedInputHeight, kStridedInputWidth)
15603     .next_input_height(kStridedInputHeight + 3)
15604     .kernel_size(3, 3)
15605     .stride(2)
15606     .groups(2)
15607     .group_input_channels(15)
15608     .group_output_channels(17)
15609     .TestSetupF32();
15610 }
15611 
15612 TEST(DECONVOLUTION_NHWC_F32, 3x3s2_setup_changing_width) {
15613   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15614   DeconvolutionOperatorTester()
15615     .batch_size(2)
15616     .input_size(kStridedInputHeight, kStridedInputWidth)
15617     .next_input_width(kStridedInputWidth + 3)
15618     .kernel_size(3, 3)
15619     .stride(2)
15620     .groups(2)
15621     .group_input_channels(15)
15622     .group_output_channels(17)
15623     .TestSetupF32();
15624 }
15625 
15626 /**************************** SUBCONV2D/GEMM path ****************************/
15627 
15628 TEST(DECONVOLUTION_NHWC_F32, 2x2s2) {
15629   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15630   DeconvolutionOperatorTester()
15631     .input_size(kStridedInputHeight, kStridedInputWidth)
15632     .kernel_size(2, 2)
15633     .stride(2)
15634     .group_input_channels(15)
15635     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15636     .iterations(3)
15637     .TestF32();
15638 }
15639 
TEST(DECONVOLUTION_NHWC_F32,Kx2sKx2)15640 TEST(DECONVOLUTION_NHWC_F32, Kx2sKx2) {
15641   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15642   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
15643     DeconvolutionOperatorTester()
15644       .input_size(kStridedInputHeight, kStridedInputWidth)
15645       .kernel_size(kernel_height, 2)
15646       .stride(kernel_height, 2)
15647       .group_input_channels(17)
15648       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15649       .iterations(3)
15650       .TestF32();
15651   }
15652 }
15653 
15654 TEST(DECONVOLUTION_NHWC_F32, 2xKs2xK) {
15655   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15656   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
15657     DeconvolutionOperatorTester()
15658       .input_size(kStridedInputHeight, kStridedInputWidth)
15659       .kernel_size(2, kernel_width)
15660       .stride(2, kernel_width)
15661       .group_input_channels(17)
15662       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15663       .iterations(3)
15664       .TestF32();
15665   }
15666 }
15667 
15668 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_height_adjustment) {
15669   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15670   DeconvolutionOperatorTester()
15671     .input_size(kStridedInputHeight, kStridedInputWidth)
15672     .adjustment_height(1)
15673     .kernel_size(2, 2)
15674     .stride(2)
15675     .group_input_channels(15)
15676     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15677     .iterations(1)
15678     .TestF32();
15679 }
15680 
15681 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_width_adjustment) {
15682   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15683   DeconvolutionOperatorTester()
15684     .input_size(kStridedInputHeight, kStridedInputWidth)
15685     .adjustment_width(1)
15686     .kernel_size(2, 2)
15687     .stride(2)
15688     .group_input_channels(15)
15689     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15690     .iterations(1)
15691     .TestF32();
15692 }
15693 
15694 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_input_height) {
15695   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15696   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15697     DeconvolutionOperatorTester()
15698       .input_size(input_height, kStridedInputWidth)
15699       .kernel_size(2, 2)
15700       .stride(2)
15701       .group_input_channels(15)
15702       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15703       .iterations(1)
15704       .TestF32();
15705   }
15706 }
15707 
15708 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_input_width) {
15709   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15710   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15711     DeconvolutionOperatorTester()
15712       .input_size(kStridedInputHeight, kStridedInputWidth)
15713       .kernel_size(2, 2)
15714       .stride(2)
15715       .group_input_channels(15)
15716       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15717       .iterations(1)
15718       .TestF32();
15719   }
15720 }
15721 
15722 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_input_channels) {
15723   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15724   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
15725     DeconvolutionOperatorTester()
15726       .input_size(kStridedInputHeight, kStridedInputWidth)
15727       .kernel_size(2, 2)
15728       .stride(2)
15729       .group_input_channels(input_channels)
15730       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15731       .iterations(1)
15732       .TestF32();
15733   }
15734 }
15735 
15736 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_varying_output_channels) {
15737   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15738   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15739     DeconvolutionOperatorTester()
15740       .input_size(kStridedInputHeight, kStridedInputWidth)
15741       .kernel_size(2, 2)
15742       .stride(2)
15743       .group_input_channels(23)
15744       .group_output_channels(output_channels)
15745       .iterations(1)
15746       .TestF32();
15747   }
15748 }
15749 
15750 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_input_stride) {
15751   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15752   DeconvolutionOperatorTester()
15753     .input_size(kStridedInputHeight, kStridedInputWidth)
15754     .kernel_size(2, 2)
15755     .stride(2)
15756     .group_input_channels(23)
15757     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15758     .input_pixel_stride(28)
15759     .iterations(3)
15760     .TestF32();
15761 }
15762 
15763 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_output_stride) {
15764   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15765   DeconvolutionOperatorTester()
15766     .input_size(kStridedInputHeight, kStridedInputWidth)
15767     .kernel_size(2, 2)
15768     .stride(2)
15769     .group_input_channels(23)
15770     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15771     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15772     .iterations(3)
15773     .TestF32();
15774 }
15775 
15776 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_qmin) {
15777   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15778   DeconvolutionOperatorTester()
15779     .input_size(kStridedInputHeight, kStridedInputWidth)
15780     .kernel_size(2, 2)
15781     .stride(2)
15782     .group_input_channels(23)
15783     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15784     .qmin(128)
15785     .iterations(3)
15786     .TestF32();
15787 }
15788 
15789 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_with_qmax) {
15790   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15791   DeconvolutionOperatorTester()
15792     .input_size(kStridedInputHeight, kStridedInputWidth)
15793     .kernel_size(2, 2)
15794     .stride(2)
15795     .group_input_channels(23)
15796     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15797     .qmax(128)
15798     .iterations(3)
15799     .TestF32();
15800 }
15801 
15802 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_without_bias) {
15803   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15804   DeconvolutionOperatorTester()
15805     .has_bias(false)
15806     .input_size(kStridedInputHeight, kStridedInputWidth)
15807     .kernel_size(2, 2)
15808     .stride(2)
15809     .group_input_channels(23)
15810     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15811     .iterations(3)
15812     .TestF32();
15813 }
15814 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_2x2s2)15815 TEST(DECONVOLUTION_NHWC_F32, weights_cache_2x2s2) {
15816   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15817   DeconvolutionOperatorTester()
15818     .input_size(kStridedInputHeight, kStridedInputWidth)
15819     .kernel_size(2, 2)
15820     .stride(2)
15821     .group_input_channels(15)
15822     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15823     .use_weights_cache(true)
15824     .iterations(3)
15825     .TestF32();
15826 }
15827 
15828 /**************************** SUBCONV2D/GEMM path, grouped ****************************/
15829 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2)15830 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2) {
15831   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15832   DeconvolutionOperatorTester()
15833     .input_size(kStridedInputHeight, kStridedInputWidth)
15834     .kernel_size(2, 2)
15835     .stride(2)
15836     .groups(2)
15837     .group_input_channels(17)
15838     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15839     .iterations(3)
15840     .TestF32();
15841 }
15842 
TEST(DECONVOLUTION_NHWC_F32,grouped_Kx2sKx2)15843 TEST(DECONVOLUTION_NHWC_F32, grouped_Kx2sKx2) {
15844   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15845   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
15846     DeconvolutionOperatorTester()
15847       .input_size(kStridedInputHeight, kStridedInputWidth)
15848       .kernel_size(kernel_height, 2)
15849       .stride(kernel_height, 2)
15850       .groups(2)
15851       .group_input_channels(17)
15852       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15853       .iterations(3)
15854       .TestF32();
15855   }
15856 }
15857 
TEST(DECONVOLUTION_NHWC_F32,grouped_2xKs2xK)15858 TEST(DECONVOLUTION_NHWC_F32, grouped_2xKs2xK) {
15859   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15860   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
15861     DeconvolutionOperatorTester()
15862       .input_size(kStridedInputHeight, kStridedInputWidth)
15863       .kernel_size(2, kernel_width)
15864       .stride(2, kernel_width)
15865       .groups(2)
15866       .group_input_channels(17)
15867       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15868       .iterations(3)
15869       .TestF32();
15870   }
15871 }
15872 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_height_adjustment)15873 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_height_adjustment) {
15874   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15875   DeconvolutionOperatorTester()
15876     .input_size(kStridedInputHeight, kStridedInputWidth)
15877     .adjustment_height(1)
15878     .kernel_size(2, 2)
15879     .stride(2)
15880     .groups(2)
15881     .group_input_channels(17)
15882     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15883     .iterations(1)
15884     .TestF32();
15885 }
15886 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_width_adjustment)15887 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_width_adjustment) {
15888   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15889   DeconvolutionOperatorTester()
15890     .input_size(kStridedInputHeight, kStridedInputWidth)
15891     .adjustment_width(1)
15892     .kernel_size(2, 2)
15893     .stride(2)
15894     .groups(2)
15895     .group_input_channels(17)
15896     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15897     .iterations(1)
15898     .TestF32();
15899 }
15900 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_input_height)15901 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_input_height) {
15902   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15903   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
15904     DeconvolutionOperatorTester()
15905       .input_size(input_height, kStridedInputWidth)
15906       .kernel_size(2, 2)
15907       .stride(2)
15908       .groups(2)
15909       .group_input_channels(17)
15910       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15911       .iterations(1)
15912       .TestF32();
15913   }
15914 }
15915 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_input_width)15916 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_input_width) {
15917   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15918   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
15919     DeconvolutionOperatorTester()
15920       .input_size(kStridedInputHeight, kStridedInputWidth)
15921       .kernel_size(2, 2)
15922       .stride(2)
15923       .groups(2)
15924       .group_input_channels(17)
15925       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15926       .iterations(1)
15927       .TestF32();
15928   }
15929 }
15930 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_input_channels)15931 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_input_channels) {
15932   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15933   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
15934     DeconvolutionOperatorTester()
15935       .input_size(kStridedInputHeight, kStridedInputWidth)
15936       .kernel_size(2, 2)
15937       .stride(2)
15938       .groups(2)
15939       .group_input_channels(input_channels)
15940       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15941       .iterations(1)
15942       .TestF32();
15943   }
15944 }
15945 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_varying_output_channels)15946 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_varying_output_channels) {
15947   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15948   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
15949     DeconvolutionOperatorTester()
15950       .input_size(kStridedInputHeight, kStridedInputWidth)
15951       .kernel_size(2, 2)
15952       .stride(2)
15953       .groups(2)
15954       .group_input_channels(17)
15955       .group_output_channels(output_channels)
15956       .iterations(1)
15957       .TestF32();
15958   }
15959 }
15960 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_input_stride)15961 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_input_stride) {
15962   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15963   DeconvolutionOperatorTester()
15964     .input_size(kStridedInputHeight, kStridedInputWidth)
15965     .kernel_size(2, 2)
15966     .stride(2)
15967     .groups(2)
15968     .group_input_channels(17)
15969     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15970     .input_pixel_stride(37)
15971     .iterations(3)
15972     .TestF32();
15973 }
15974 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_output_stride)15975 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_output_stride) {
15976   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15977   DeconvolutionOperatorTester()
15978     .input_size(kStridedInputHeight, kStridedInputWidth)
15979     .kernel_size(2, 2)
15980     .stride(2)
15981     .groups(2)
15982     .group_input_channels(17)
15983     .group_output_channels(xnn_params.f32.gemm.nr + 3)
15984     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
15985     .iterations(3)
15986     .TestF32();
15987 }
15988 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_qmin)15989 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_qmin) {
15990   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15991   DeconvolutionOperatorTester()
15992     .input_size(kStridedInputHeight, kStridedInputWidth)
15993     .kernel_size(2, 2)
15994     .stride(2)
15995     .groups(2)
15996     .group_input_channels(17)
15997     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
15998     .qmin(128)
15999     .iterations(3)
16000     .TestF32();
16001 }
16002 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_with_qmax)16003 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_with_qmax) {
16004   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16005   DeconvolutionOperatorTester()
16006     .input_size(kStridedInputHeight, kStridedInputWidth)
16007     .kernel_size(2, 2)
16008     .stride(2)
16009     .groups(2)
16010     .group_input_channels(17)
16011     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16012     .qmax(128)
16013     .iterations(3)
16014     .TestF32();
16015 }
16016 
TEST(DECONVOLUTION_NHWC_F32,grouped_2x2s2_without_bias)16017 TEST(DECONVOLUTION_NHWC_F32, grouped_2x2s2_without_bias) {
16018   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16019   DeconvolutionOperatorTester()
16020     .has_bias(false)
16021     .input_size(kStridedInputHeight, kStridedInputWidth)
16022     .kernel_size(2, 2)
16023     .stride(2)
16024     .groups(2)
16025     .group_input_channels(17)
16026     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16027     .iterations(3)
16028     .TestF32();
16029 }
16030 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_grouped_2x2s2)16031 TEST(DECONVOLUTION_NHWC_F32, weights_cache_grouped_2x2s2) {
16032   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16033   DeconvolutionOperatorTester()
16034     .input_size(kStridedInputHeight, kStridedInputWidth)
16035     .kernel_size(2, 2)
16036     .stride(2)
16037     .groups(2)
16038     .group_input_channels(17)
16039     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16040     .use_weights_cache(true)
16041     .iterations(3)
16042     .TestF32();
16043 }
16044 
16045 /**************************** SUBCONV2D/GEMM path, batched ****************************/
16046 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2)16047 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2) {
16048   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16049   DeconvolutionOperatorTester()
16050     .batch_size(2)
16051     .input_size(kStridedInputHeight, kStridedInputWidth)
16052     .kernel_size(2, 2)
16053     .stride(2)
16054     .group_input_channels(15)
16055     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16056     .iterations(3)
16057     .TestF32();
16058 }
16059 
TEST(DECONVOLUTION_NHWC_F32,batched_Kx2sKx2)16060 TEST(DECONVOLUTION_NHWC_F32, batched_Kx2sKx2) {
16061   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16062   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
16063     DeconvolutionOperatorTester()
16064       .batch_size(2)
16065       .input_size(kStridedInputHeight, kStridedInputWidth)
16066       .kernel_size(kernel_height, 2)
16067       .stride(kernel_height, 2)
16068       .group_input_channels(17)
16069       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16070       .iterations(3)
16071       .TestF32();
16072   }
16073 }
16074 
TEST(DECONVOLUTION_NHWC_F32,batched_2xKs2xK)16075 TEST(DECONVOLUTION_NHWC_F32, batched_2xKs2xK) {
16076   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16077   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
16078     DeconvolutionOperatorTester()
16079       .batch_size(2)
16080       .input_size(kStridedInputHeight, kStridedInputWidth)
16081       .kernel_size(2, kernel_width)
16082       .stride(2, kernel_width)
16083       .group_input_channels(17)
16084       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16085       .iterations(3)
16086       .TestF32();
16087   }
16088 }
16089 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_height_adjustment)16090 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_height_adjustment) {
16091   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16092   DeconvolutionOperatorTester()
16093     .batch_size(2)
16094     .input_size(kStridedInputHeight, kStridedInputWidth)
16095     .adjustment_height(1)
16096     .kernel_size(2, 2)
16097     .stride(2)
16098     .group_input_channels(15)
16099     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16100     .iterations(1)
16101     .TestF32();
16102 }
16103 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_width_adjustment)16104 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_width_adjustment) {
16105   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16106   DeconvolutionOperatorTester()
16107     .batch_size(2)
16108     .input_size(kStridedInputHeight, kStridedInputWidth)
16109     .adjustment_width(1)
16110     .kernel_size(2, 2)
16111     .stride(2)
16112     .group_input_channels(15)
16113     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16114     .iterations(1)
16115     .TestF32();
16116 }
16117 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_input_height)16118 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_input_height) {
16119   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16120   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
16121     DeconvolutionOperatorTester()
16122       .batch_size(2)
16123       .input_size(input_height, kStridedInputWidth)
16124       .kernel_size(2, 2)
16125       .stride(2)
16126       .group_input_channels(15)
16127       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16128       .iterations(1)
16129       .TestF32();
16130   }
16131 }
16132 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_input_width)16133 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_input_width) {
16134   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16135   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
16136     DeconvolutionOperatorTester()
16137       .batch_size(2)
16138       .input_size(kStridedInputHeight, kStridedInputWidth)
16139       .kernel_size(2, 2)
16140       .stride(2)
16141       .group_input_channels(15)
16142       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16143       .iterations(1)
16144       .TestF32();
16145   }
16146 }
16147 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_input_channels)16148 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_input_channels) {
16149   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16150   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
16151     DeconvolutionOperatorTester()
16152       .batch_size(2)
16153       .input_size(kStridedInputHeight, kStridedInputWidth)
16154       .kernel_size(2, 2)
16155       .stride(2)
16156       .group_input_channels(input_channels)
16157       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16158       .iterations(1)
16159       .TestF32();
16160   }
16161 }
16162 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_varying_output_channels)16163 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_varying_output_channels) {
16164   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16165   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
16166     DeconvolutionOperatorTester()
16167       .batch_size(2)
16168       .input_size(kStridedInputHeight, kStridedInputWidth)
16169       .kernel_size(2, 2)
16170       .stride(2)
16171       .group_input_channels(23)
16172       .group_output_channels(output_channels)
16173       .iterations(1)
16174       .TestF32();
16175   }
16176 }
16177 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_input_stride)16178 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_input_stride) {
16179   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16180   DeconvolutionOperatorTester()
16181     .batch_size(2)
16182     .input_size(kStridedInputHeight, kStridedInputWidth)
16183     .kernel_size(2, 2)
16184     .stride(2)
16185     .group_input_channels(23)
16186     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16187     .input_pixel_stride(28)
16188     .iterations(3)
16189     .TestF32();
16190 }
16191 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_output_stride)16192 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_output_stride) {
16193   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16194   DeconvolutionOperatorTester()
16195     .batch_size(2)
16196     .input_size(kStridedInputHeight, kStridedInputWidth)
16197     .kernel_size(2, 2)
16198     .stride(2)
16199     .group_input_channels(23)
16200     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16201     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
16202     .iterations(3)
16203     .TestF32();
16204 }
16205 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_qmin)16206 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_qmin) {
16207   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16208   DeconvolutionOperatorTester()
16209     .batch_size(2)
16210     .input_size(kStridedInputHeight, kStridedInputWidth)
16211     .kernel_size(2, 2)
16212     .stride(2)
16213     .group_input_channels(23)
16214     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16215     .qmin(128)
16216     .iterations(3)
16217     .TestF32();
16218 }
16219 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_with_qmax)16220 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_with_qmax) {
16221   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16222   DeconvolutionOperatorTester()
16223     .batch_size(2)
16224     .input_size(kStridedInputHeight, kStridedInputWidth)
16225     .kernel_size(2, 2)
16226     .stride(2)
16227     .group_input_channels(23)
16228     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16229     .qmax(128)
16230     .iterations(3)
16231     .TestF32();
16232 }
16233 
TEST(DECONVOLUTION_NHWC_F32,batched_2x2s2_without_bias)16234 TEST(DECONVOLUTION_NHWC_F32, batched_2x2s2_without_bias) {
16235   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16236   DeconvolutionOperatorTester()
16237     .has_bias(false)
16238     .batch_size(2)
16239     .input_size(kStridedInputHeight, kStridedInputWidth)
16240     .kernel_size(2, 2)
16241     .stride(2)
16242     .group_input_channels(23)
16243     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16244     .iterations(3)
16245     .TestF32();
16246 }
16247 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_2x2s2)16248 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_2x2s2) {
16249   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16250   DeconvolutionOperatorTester()
16251     .batch_size(2)
16252     .input_size(kStridedInputHeight, kStridedInputWidth)
16253     .kernel_size(2, 2)
16254     .stride(2)
16255     .group_input_channels(15)
16256     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16257     .use_weights_cache(true)
16258     .iterations(3)
16259     .TestF32();
16260 }
16261 
16262 /**************************** SUBCONV2D/GEMM path, grouped, batched ****************************/
16263 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2)16264 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2) {
16265   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16266   DeconvolutionOperatorTester()
16267     .batch_size(2)
16268     .input_size(kStridedInputHeight, kStridedInputWidth)
16269     .kernel_size(2, 2)
16270     .stride(2)
16271     .groups(2)
16272     .group_input_channels(17)
16273     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16274     .iterations(3)
16275     .TestF32();
16276 }
16277 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_Kx2sKx2)16278 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_Kx2sKx2) {
16279   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16280   for (size_t kernel_height = 3; kernel_height <= 5; kernel_height++) {
16281     DeconvolutionOperatorTester()
16282       .batch_size(2)
16283       .input_size(kStridedInputHeight, kStridedInputWidth)
16284       .kernel_size(kernel_height, 2)
16285       .stride(kernel_height, 2)
16286       .groups(2)
16287       .group_input_channels(17)
16288       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16289       .iterations(3)
16290       .TestF32();
16291   }
16292 }
16293 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2xKs2xK)16294 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2xKs2xK) {
16295   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16296   for (size_t kernel_width = 3; kernel_width <= 5; kernel_width++) {
16297     DeconvolutionOperatorTester()
16298       .batch_size(2)
16299       .input_size(kStridedInputHeight, kStridedInputWidth)
16300       .kernel_size(2, kernel_width)
16301       .stride(2, kernel_width)
16302       .groups(2)
16303       .group_input_channels(17)
16304       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16305       .iterations(3)
16306       .TestF32();
16307   }
16308 }
16309 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_height_adjustment)16310 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_height_adjustment) {
16311   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16312   DeconvolutionOperatorTester()
16313     .batch_size(2)
16314     .input_size(kStridedInputHeight, kStridedInputWidth)
16315     .adjustment_height(1)
16316     .kernel_size(2, 2)
16317     .stride(2)
16318     .groups(2)
16319     .group_input_channels(17)
16320     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16321     .iterations(1)
16322     .TestF32();
16323 }
16324 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_width_adjustment)16325 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_width_adjustment) {
16326   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16327   DeconvolutionOperatorTester()
16328     .batch_size(2)
16329     .input_size(kStridedInputHeight, kStridedInputWidth)
16330     .adjustment_width(1)
16331     .kernel_size(2, 2)
16332     .stride(2)
16333     .groups(2)
16334     .group_input_channels(17)
16335     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16336     .iterations(1)
16337     .TestF32();
16338 }
16339 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_input_height)16340 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_input_height) {
16341   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16342   for (size_t input_height = kStridedInputHeight - 2; input_height <= kStridedInputHeight + 2; input_height++) {
16343     DeconvolutionOperatorTester()
16344       .batch_size(2)
16345       .input_size(input_height, kStridedInputWidth)
16346       .kernel_size(2, 2)
16347       .stride(2)
16348       .groups(2)
16349       .group_input_channels(17)
16350       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16351       .iterations(1)
16352       .TestF32();
16353   }
16354 }
16355 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_input_width)16356 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_input_width) {
16357   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16358   for (size_t input_width = kStridedInputWidth - 2; input_width <= kStridedInputWidth + 2; input_width++) {
16359     DeconvolutionOperatorTester()
16360       .batch_size(2)
16361       .input_size(kStridedInputHeight, kStridedInputWidth)
16362       .kernel_size(2, 2)
16363       .stride(2)
16364       .groups(2)
16365       .group_input_channels(17)
16366       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16367       .iterations(1)
16368       .TestF32();
16369   }
16370 }
16371 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_input_channels)16372 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_input_channels) {
16373   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16374   for (size_t input_channels = 14; input_channels <= 20; input_channels++) {
16375     DeconvolutionOperatorTester()
16376       .batch_size(2)
16377       .input_size(kStridedInputHeight, kStridedInputWidth)
16378       .kernel_size(2, 2)
16379       .stride(2)
16380       .groups(2)
16381       .group_input_channels(input_channels)
16382       .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16383       .iterations(1)
16384       .TestF32();
16385   }
16386 }
16387 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_varying_output_channels)16388 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_varying_output_channels) {
16389   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16390   for (size_t output_channels = 1; output_channels <= xnn_params.f32.gemm.nr * 2; output_channels *= 2) {
16391     DeconvolutionOperatorTester()
16392       .batch_size(2)
16393       .input_size(kStridedInputHeight, kStridedInputWidth)
16394       .kernel_size(2, 2)
16395       .stride(2)
16396       .groups(2)
16397       .group_input_channels(17)
16398       .group_output_channels(output_channels)
16399       .iterations(1)
16400       .TestF32();
16401   }
16402 }
16403 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_input_stride)16404 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_input_stride) {
16405   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16406   DeconvolutionOperatorTester()
16407     .batch_size(2)
16408     .input_size(kStridedInputHeight, kStridedInputWidth)
16409     .kernel_size(2, 2)
16410     .stride(2)
16411     .groups(2)
16412     .group_input_channels(17)
16413     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16414     .input_pixel_stride(37)
16415     .iterations(3)
16416     .TestF32();
16417 }
16418 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_output_stride)16419 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_output_stride) {
16420   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16421   DeconvolutionOperatorTester()
16422     .batch_size(2)
16423     .input_size(kStridedInputHeight, kStridedInputWidth)
16424     .kernel_size(2, 2)
16425     .stride(2)
16426     .groups(2)
16427     .group_input_channels(17)
16428     .group_output_channels(xnn_params.f32.gemm.nr + 3)
16429     .output_pixel_stride(xnn_params.f32.gemm.nr * 2 + 13)
16430     .iterations(3)
16431     .TestF32();
16432 }
16433 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_qmin)16434 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_qmin) {
16435   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16436   DeconvolutionOperatorTester()
16437     .batch_size(2)
16438     .input_size(kStridedInputHeight, kStridedInputWidth)
16439     .kernel_size(2, 2)
16440     .stride(2)
16441     .groups(2)
16442     .group_input_channels(17)
16443     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16444     .qmin(128)
16445     .iterations(3)
16446     .TestF32();
16447 }
16448 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_with_qmax)16449 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_with_qmax) {
16450   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16451   DeconvolutionOperatorTester()
16452     .batch_size(2)
16453     .input_size(kStridedInputHeight, kStridedInputWidth)
16454     .kernel_size(2, 2)
16455     .stride(2)
16456     .groups(2)
16457     .group_input_channels(17)
16458     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16459     .qmax(128)
16460     .iterations(3)
16461     .TestF32();
16462 }
16463 
TEST(DECONVOLUTION_NHWC_F32,batched_grouped_2x2s2_without_bias)16464 TEST(DECONVOLUTION_NHWC_F32, batched_grouped_2x2s2_without_bias) {
16465   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16466   DeconvolutionOperatorTester()
16467     .has_bias(false)
16468     .batch_size(2)
16469     .input_size(kStridedInputHeight, kStridedInputWidth)
16470     .kernel_size(2, 2)
16471     .stride(2)
16472     .groups(2)
16473     .group_input_channels(17)
16474     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16475     .iterations(3)
16476     .TestF32();
16477 }
16478 
TEST(DECONVOLUTION_NHWC_F32,weights_cache_batched_grouped_2x2s2)16479 TEST(DECONVOLUTION_NHWC_F32, weights_cache_batched_grouped_2x2s2) {
16480   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16481   DeconvolutionOperatorTester()
16482     .batch_size(2)
16483     .input_size(kStridedInputHeight, kStridedInputWidth)
16484     .kernel_size(2, 2)
16485     .stride(2)
16486     .groups(2)
16487     .group_input_channels(17)
16488     .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
16489     .use_weights_cache(true)
16490     .iterations(3)
16491     .TestF32();
16492 }
16493 
16494 /**************************** SUBCONV2D/GEMM path, setup ****************************/
16495 
16496 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_setup_changing_batch) {
16497   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16498   DeconvolutionOperatorTester()
16499     .batch_size(2)
16500     .next_batch_size(5)
16501     .input_size(kStridedInputHeight, kStridedInputWidth)
16502     .kernel_size(2, 2)
16503     .stride(2)
16504     .groups(2)
16505     .group_input_channels(15)
16506     .group_output_channels(17)
16507     .TestSetupF32();
16508 }
16509 
16510 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_setup_changing_height) {
16511   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16512   DeconvolutionOperatorTester()
16513     .batch_size(2)
16514     .input_size(kStridedInputHeight, kStridedInputWidth)
16515     .next_input_height(kStridedInputHeight + 3)
16516     .kernel_size(2, 2)
16517     .stride(2)
16518     .groups(2)
16519     .group_input_channels(15)
16520     .group_output_channels(17)
16521     .TestSetupF32();
16522 }
16523 
16524 TEST(DECONVOLUTION_NHWC_F32, 2x2s2_setup_changing_width) {
16525   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
16526   DeconvolutionOperatorTester()
16527     .batch_size(2)
16528     .input_size(kStridedInputHeight, kStridedInputWidth)
16529     .next_input_width(kStridedInputWidth + 3)
16530     .kernel_size(2, 2)
16531     .stride(2)
16532     .groups(2)
16533     .group_input_channels(15)
16534     .group_output_channels(17)
16535     .TestSetupF32();
16536 }
16537