xref: /aosp_15_r20/external/XNNPACK/test/convolution-nchw.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <gtest/gtest.h>
7 
8 #include "convolution-operator-tester.h"
9 
10 
11 /**************************** SPMM path ****************************/
12 
13 TEST(CONVOLUTION_NCHW_F32, 1x1) {
14   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
15   ConvolutionOperatorTester()
16     .input_size(27, 29)
17     .kernel_size(1, 1)
18     .group_input_channels(23)
19     .group_output_channels(19)
20     .sparsity(0.5f)
21     .iterations(3)
22     .TestNCHWxF32();
23 }
24 
25 TEST(CONVOLUTION_NCHW_F32, 1x1_zero_weights) {
26   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
27   ConvolutionOperatorTester()
28     .input_size(27, 29)
29     .kernel_size(1, 1)
30     .group_input_channels(23)
31     .group_output_channels(19)
32     .sparsity(1.0f)
33     .iterations(3)
34     .TestNCHWxF32();
35 }
36 
37 TEST(CONVOLUTION_NCHW_F32, 1x1_varying_input_height) {
38   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
39   for (size_t input_height = 25; input_height <= 31; input_height++) {
40     ConvolutionOperatorTester()
41       .input_size(input_height, 29)
42       .kernel_size(1, 1)
43       .group_input_channels(23)
44       .group_output_channels(19)
45       .sparsity(0.5f)
46       .iterations(1)
47       .TestNCHWxF32();
48   }
49 }
50 
51 TEST(CONVOLUTION_NCHW_F32, 1x1_varying_input_width) {
52   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
53   for (size_t input_width = 27; input_width <= 33; input_width++) {
54     ConvolutionOperatorTester()
55       .input_size(27, input_width)
56       .kernel_size(1, 1)
57       .group_input_channels(23)
58       .group_output_channels(19)
59       .sparsity(0.5f)
60       .iterations(1)
61       .TestNCHWxF32();
62   }
63 }
64 
65 TEST(CONVOLUTION_NCHW_F32, 1x1_varying_input_channels) {
66   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
67   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
68     ConvolutionOperatorTester()
69       .input_size(27, 29)
70       .kernel_size(1, 1)
71       .group_input_channels(input_channels)
72       .group_output_channels(19)
73       .sparsity(0.5f)
74       .iterations(1)
75       .TestNCHWxF32();
76   }
77 }
78 
79 TEST(CONVOLUTION_NCHW_F32, 1x1_varying_output_channels) {
80   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
81   for (size_t output_channels = 1; output_channels < 19; output_channels *= 2) {
82     ConvolutionOperatorTester()
83       .input_size(27, 29)
84       .kernel_size(1, 1)
85       .group_input_channels(23)
86       .group_output_channels(output_channels)
87       .sparsity(0.5f)
88       .iterations(1)
89       .TestNCHWxF32();
90   }
91 }
92 
93 TEST(CONVOLUTION_NCHW_F32, 1x1_with_qmin) {
94   ConvolutionOperatorTester()
95     .input_size(27, 29)
96     .kernel_size(1, 1)
97     .group_input_channels(23)
98     .group_output_channels(19)
99     .sparsity(0.5f)
100     .qmin(128)
101     .iterations(3)
102     .TestNCHWxF32();
103 }
104 
105 TEST(CONVOLUTION_NCHW_F32, 1x1_with_qmax) {
106   ConvolutionOperatorTester()
107     .input_size(27, 29)
108     .kernel_size(1, 1)
109     .group_input_channels(23)
110     .group_output_channels(19)
111     .sparsity(0.5f)
112     .qmax(128)
113     .iterations(3)
114     .TestNCHWxF32();
115 }
116 
117 TEST(CONVOLUTION_NCHW_F32, 1x1_without_bias) {
118   ConvolutionOperatorTester()
119     .has_bias(false)
120     .input_size(27, 29)
121     .kernel_size(1, 1)
122     .group_input_channels(23)
123     .group_output_channels(19)
124     .sparsity(0.5f)
125     .iterations(3)
126     .TestNCHWxF32();
127 }
128 
129 // Weights cache is not supported for SPMM microkernel, add a test here, but skip the assertions.
TEST(CONVOLUTION_NCHW_F32,weights_cache_1x1)130 TEST(CONVOLUTION_NCHW_F32, weights_cache_1x1) {
131   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
132   ConvolutionOperatorTester()
133     .input_size(27, 29)
134     .kernel_size(1, 1)
135     .group_input_channels(23)
136     .group_output_channels(19)
137     .sparsity(0.5f)
138     .use_weights_cache(true)
139     .iterations(3)
140     .TestNCHWxF32();
141 }
142 
143 /**************************** SPMM path, batched ****************************/
144 
TEST(CONVOLUTION_NCHW_F32,batched_1x1)145 TEST(CONVOLUTION_NCHW_F32, batched_1x1) {
146   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
147   ConvolutionOperatorTester()
148     .batch_size(2)
149     .input_size(27, 29)
150     .kernel_size(1, 1)
151     .group_input_channels(23)
152     .group_output_channels(19)
153     .sparsity(0.5f)
154     .iterations(3)
155     .TestNCHWxF32();
156 }
157 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_zero_weights)158 TEST(CONVOLUTION_NCHW_F32, batched_1x1_zero_weights) {
159   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
160   ConvolutionOperatorTester()
161     .batch_size(2)
162     .input_size(27, 29)
163     .kernel_size(1, 1)
164     .group_input_channels(23)
165     .group_output_channels(19)
166     .sparsity(1.0f)
167     .iterations(3)
168     .TestNCHWxF32();
169 }
170 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_varying_input_height)171 TEST(CONVOLUTION_NCHW_F32, batched_1x1_varying_input_height) {
172   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
173   for (size_t input_height = 25; input_height <= 31; input_height++) {
174     ConvolutionOperatorTester()
175       .batch_size(2)
176       .input_size(input_height, 29)
177       .kernel_size(1, 1)
178       .group_input_channels(23)
179       .group_output_channels(19)
180       .sparsity(0.5f)
181       .iterations(1)
182       .TestNCHWxF32();
183   }
184 }
185 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_varying_input_width)186 TEST(CONVOLUTION_NCHW_F32, batched_1x1_varying_input_width) {
187   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
188   for (size_t input_width = 27; input_width <= 33; input_width++) {
189     ConvolutionOperatorTester()
190       .batch_size(2)
191       .input_size(27, input_width)
192       .kernel_size(1, 1)
193       .group_input_channels(23)
194       .group_output_channels(19)
195       .sparsity(0.5f)
196       .iterations(1)
197       .TestNCHWxF32();
198   }
199 }
200 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_varying_input_channels)201 TEST(CONVOLUTION_NCHW_F32, batched_1x1_varying_input_channels) {
202   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
203   for (size_t input_channels = 1; input_channels <= 16; input_channels *= 4) {
204     ConvolutionOperatorTester()
205       .batch_size(2)
206       .input_size(27, 29)
207       .kernel_size(1, 1)
208       .group_input_channels(input_channels)
209       .group_output_channels(19)
210       .sparsity(0.5f)
211       .iterations(1)
212       .TestNCHWxF32();
213   }
214 }
215 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_varying_output_channels)216 TEST(CONVOLUTION_NCHW_F32, batched_1x1_varying_output_channels) {
217   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
218   for (size_t output_channels = 1; output_channels < 19; output_channels *= 2) {
219     ConvolutionOperatorTester()
220       .batch_size(2)
221       .input_size(27, 29)
222       .kernel_size(1, 1)
223       .group_input_channels(23)
224       .group_output_channels(output_channels)
225       .sparsity(0.5f)
226       .iterations(1)
227       .TestNCHWxF32();
228   }
229 }
230 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_with_input_stride)231 TEST(CONVOLUTION_NCHW_F32, batched_1x1_with_input_stride) {
232   ConvolutionOperatorTester()
233     .batch_size(2)
234     .input_size(27, 29)
235     .kernel_size(1, 1)
236     .input_channel_stride(25)
237     .group_input_channels(23)
238     .group_output_channels(19)
239     .sparsity(0.5f)
240     .iterations(3)
241     .TestNCHWxF32();
242 }
243 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_with_output_stride)244 TEST(CONVOLUTION_NCHW_F32, batched_1x1_with_output_stride) {
245   ConvolutionOperatorTester()
246     .batch_size(2)
247     .input_size(27, 29)
248     .kernel_size(1, 1)
249     .output_channel_stride(21)
250     .group_input_channels(23)
251     .group_output_channels(19)
252     .sparsity(0.5f)
253     .iterations(3)
254     .TestNCHWxF32();
255 }
256 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_with_qmin)257 TEST(CONVOLUTION_NCHW_F32, batched_1x1_with_qmin) {
258   ConvolutionOperatorTester()
259     .batch_size(2)
260     .input_size(27, 29)
261     .kernel_size(1, 1)
262     .group_input_channels(23)
263     .group_output_channels(19)
264     .sparsity(0.5f)
265     .qmin(128)
266     .iterations(3)
267     .TestNCHWxF32();
268 }
269 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_with_qmax)270 TEST(CONVOLUTION_NCHW_F32, batched_1x1_with_qmax) {
271   ConvolutionOperatorTester()
272     .batch_size(2)
273     .input_size(27, 29)
274     .kernel_size(1, 1)
275     .group_input_channels(23)
276     .group_output_channels(19)
277     .sparsity(0.5f)
278     .qmax(128)
279     .iterations(3)
280     .TestNCHWxF32();
281 }
282 
TEST(CONVOLUTION_NCHW_F32,batched_1x1_without_bias)283 TEST(CONVOLUTION_NCHW_F32, batched_1x1_without_bias) {
284   ConvolutionOperatorTester()
285     .has_bias(false)
286     .batch_size(2)
287     .input_size(27, 29)
288     .kernel_size(1, 1)
289     .group_input_channels(23)
290     .group_output_channels(19)
291     .sparsity(0.5f)
292     .iterations(3)
293     .TestNCHWxF32();
294 }
295 
296 /**************************** DConv 3x3c3s2 HWC->CHW path ****************************/
297 
298 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2) {
299   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
300   ConvolutionOperatorTester()
301     .input_size(27, 29)
302     .padding(1)
303     .kernel_size(3, 3)
304     .subsampling(2)
305     .group_input_channels(3)
306     .group_output_channels(19)
307     .force_nhwc_input(true)
308     .iterations(3)
309     .TestNCHWxF32();
310 }
311 
312 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2_varying_input_height) {
313   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
314   for (size_t input_height = 25; input_height <= 31; input_height++) {
315     ConvolutionOperatorTester()
316       .input_size(input_height, 29)
317       .padding(1)
318       .kernel_size(3, 3)
319       .subsampling(2)
320       .group_input_channels(3)
321       .group_output_channels(19)
322       .force_nhwc_input(true)
323       .iterations(1)
324       .TestNCHWxF32();
325   }
326 }
327 
328 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2_varying_input_width) {
329   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
330   for (size_t input_width = 27; input_width <= 33; input_width++) {
331     ConvolutionOperatorTester()
332       .input_size(27, input_width)
333       .padding(1)
334       .kernel_size(3, 3)
335       .subsampling(2)
336       .group_input_channels(3)
337       .group_output_channels(19)
338       .force_nhwc_input(true)
339       .iterations(1)
340       .TestNCHWxF32();
341   }
342 }
343 
344 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2_varying_output_channels) {
345   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
346   for (size_t output_channels = 1; output_channels < 19; output_channels *= 2) {
347     ConvolutionOperatorTester()
348       .input_size(27, 29)
349       .padding(1)
350       .kernel_size(3, 3)
351       .subsampling(2)
352       .group_input_channels(3)
353       .group_output_channels(output_channels)
354       .force_nhwc_input(true)
355       .iterations(1)
356       .TestNCHWxF32();
357   }
358 }
359 
360 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2_with_qmin) {
361   ConvolutionOperatorTester()
362     .input_size(27, 29)
363     .padding(1)
364     .kernel_size(3, 3)
365     .subsampling(2)
366     .group_input_channels(3)
367     .group_output_channels(19)
368     .force_nhwc_input(true)
369     .qmin(128)
370     .iterations(3)
371     .TestNCHWxF32();
372 }
373 
374 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2_with_qmax) {
375   ConvolutionOperatorTester()
376     .input_size(27, 29)
377     .padding(1)
378     .kernel_size(3, 3)
379     .subsampling(2)
380     .group_input_channels(3)
381     .group_output_channels(19)
382     .force_nhwc_input(true)
383     .qmax(128)
384     .iterations(3)
385     .TestNCHWxF32();
386 }
387 
388 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, 3x3c3s2_without_bias) {
389   ConvolutionOperatorTester()
390     .has_bias(false)
391     .input_size(27, 29)
392     .padding(1)
393     .kernel_size(3, 3)
394     .subsampling(2)
395     .group_input_channels(3)
396     .group_output_channels(19)
397     .force_nhwc_input(true)
398     .iterations(3)
399     .TestNCHWxF32();
400 }
401 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,weights_cache_3x3c3s2)402 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, weights_cache_3x3c3s2) {
403   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
404   ConvolutionOperatorTester()
405     .input_size(27, 29)
406     .padding(1)
407     .kernel_size(3, 3)
408     .subsampling(2)
409     .group_input_channels(3)
410     .group_output_channels(19)
411     .force_nhwc_input(true)
412     .use_weights_cache(true)
413     .iterations(3)
414     .TestNCHWxF32();
415 }
416 
417 /**************************** DConv 3x3c3s2 HWC->CHW path, batched ****************************/
418 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2)419 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2) {
420   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
421   ConvolutionOperatorTester()
422     .batch_size(2)
423     .input_size(27, 29)
424     .padding(1)
425     .kernel_size(3, 3)
426     .subsampling(2)
427     .group_input_channels(3)
428     .group_output_channels(19)
429     .force_nhwc_input(true)
430     .iterations(3)
431     .TestNCHWxF32();
432 }
433 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_varying_input_height)434 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_varying_input_height) {
435   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
436   for (size_t input_height = 25; input_height <= 31; input_height++) {
437     ConvolutionOperatorTester()
438       .batch_size(2)
439       .input_size(input_height, 29)
440       .padding(1)
441       .kernel_size(3, 3)
442       .subsampling(2)
443       .group_input_channels(3)
444       .group_output_channels(19)
445       .force_nhwc_input(true)
446       .iterations(1)
447       .TestNCHWxF32();
448   }
449 }
450 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_varying_input_width)451 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_varying_input_width) {
452   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
453   for (size_t input_width = 27; input_width <= 33; input_width++) {
454     ConvolutionOperatorTester()
455       .batch_size(2)
456       .input_size(27, input_width)
457       .padding(1)
458       .kernel_size(3, 3)
459       .subsampling(2)
460       .group_input_channels(3)
461       .group_output_channels(19)
462       .force_nhwc_input(true)
463       .iterations(1)
464       .TestNCHWxF32();
465   }
466 }
467 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_varying_output_channels)468 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_varying_output_channels) {
469   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
470   for (size_t output_channels = 1; output_channels < 19; output_channels *= 2) {
471     ConvolutionOperatorTester()
472       .batch_size(2)
473       .input_size(27, 29)
474       .padding(1)
475       .kernel_size(3, 3)
476       .subsampling(2)
477       .group_input_channels(3)
478       .group_output_channels(output_channels)
479       .force_nhwc_input(true)
480       .iterations(1)
481       .TestNCHWxF32();
482   }
483 }
484 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_with_output_stride)485 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_with_output_stride) {
486   ConvolutionOperatorTester()
487     .batch_size(2)
488     .input_size(27, 29)
489     .padding(1)
490     .kernel_size(3, 3)
491     .subsampling(2)
492     .output_channel_stride(21)
493     .group_input_channels(3)
494     .group_output_channels(19)
495     .force_nhwc_input(true)
496     .iterations(3)
497     .TestNCHWxF32();
498 }
499 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_with_qmin)500 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_with_qmin) {
501   ConvolutionOperatorTester()
502     .batch_size(2)
503     .input_size(27, 29)
504     .padding(1)
505     .kernel_size(3, 3)
506     .subsampling(2)
507     .group_input_channels(3)
508     .group_output_channels(19)
509     .force_nhwc_input(true)
510     .qmin(128)
511     .iterations(3)
512     .TestNCHWxF32();
513 }
514 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_with_qmax)515 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_with_qmax) {
516   ConvolutionOperatorTester()
517     .batch_size(2)
518     .input_size(27, 29)
519     .padding(1)
520     .kernel_size(3, 3)
521     .subsampling(2)
522     .group_input_channels(3)
523     .group_output_channels(19)
524     .force_nhwc_input(true)
525     .qmax(128)
526     .iterations(3)
527     .TestNCHWxF32();
528 }
529 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,batched_3x3c3s2_without_bias)530 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2_without_bias) {
531   ConvolutionOperatorTester()
532     .has_bias(false)
533     .batch_size(2)
534     .input_size(27, 29)
535     .padding(1)
536     .kernel_size(3, 3)
537     .subsampling(2)
538     .group_input_channels(3)
539     .group_output_channels(19)
540     .force_nhwc_input(true)
541     .iterations(3)
542     .TestNCHWxF32();
543 }
544 
TEST(CONVOLUTION_NHWC2NCHW_OP_F32,weights_cache_batched_3x3c3s2)545 TEST(CONVOLUTION_NHWC2NCHW_OP_F32, weights_cache_batched_3x3c3s2) {
546   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
547   ConvolutionOperatorTester()
548     .batch_size(2)
549     .input_size(27, 29)
550     .padding(1)
551     .kernel_size(3, 3)
552     .subsampling(2)
553     .group_input_channels(3)
554     .group_output_channels(19)
555     .force_nhwc_input(true)
556     .use_weights_cache(true)
557     .iterations(3)
558     .TestNCHWxF32();
559 }
560 
561 /**************************** DWCONV 3x3 path ****************************/
562 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3)563 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3) {
564   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
565   ConvolutionOperatorTester()
566     .input_size(27, 29)
567     .kernel_size(3, 3)
568     .padding(1)
569     .groups(19)
570     .iterations(3)
571     .TestNCHWxF32();
572 }
573 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_zero_weights)574 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_zero_weights) {
575   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
576   ConvolutionOperatorTester()
577     .input_size(27, 29)
578     .kernel_size(3, 3)
579     .padding(1)
580     .groups(19)
581     .sparsity(1.0f)
582     .iterations(3)
583     .TestNCHWxF32();
584 }
585 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_varying_input_height)586 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_varying_input_height) {
587   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
588   for (size_t input_height = 25; input_height <= 31; input_height++) {
589     ConvolutionOperatorTester()
590       .input_size(input_height, 29)
591       .kernel_size(3, 3)
592       .padding(1)
593       .groups(19)
594       .iterations(1)
595       .TestNCHWxF32();
596   }
597 }
598 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_varying_input_width)599 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_varying_input_width) {
600   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
601   for (size_t input_width = 27; input_width <= 33; input_width++) {
602     ConvolutionOperatorTester()
603       .input_size(27, input_width)
604       .kernel_size(3, 3)
605       .padding(1)
606       .groups(19)
607       .iterations(1)
608       .TestNCHWxF32();
609   }
610 }
611 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_varying_channels)612 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_varying_channels) {
613   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
614   for (size_t channels = 1; channels <= 16; channels *= 4) {
615     ConvolutionOperatorTester()
616       .input_size(27, 29)
617       .kernel_size(3, 3)
618       .padding(1)
619       .groups(channels)
620       .iterations(1)
621       .TestNCHWxF32();
622   }
623 }
624 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_with_qmin)625 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_with_qmin) {
626   ConvolutionOperatorTester()
627     .input_size(27, 29)
628     .kernel_size(3, 3)
629     .padding(1)
630     .groups(19)
631     .qmin(128)
632     .iterations(3)
633     .TestNCHWxF32();
634 }
635 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_with_qmax)636 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_with_qmax) {
637   ConvolutionOperatorTester()
638     .input_size(27, 29)
639     .kernel_size(3, 3)
640     .padding(1)
641     .groups(19)
642     .qmax(128)
643     .iterations(3)
644     .TestNCHWxF32();
645 }
646 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3_without_bias)647 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3_without_bias) {
648   ConvolutionOperatorTester()
649     .has_bias(false)
650     .input_size(27, 29)
651     .kernel_size(3, 3)
652     .padding(1)
653     .groups(19)
654     .iterations(3)
655     .TestNCHWxF32();
656 }
657 
TEST(CONVOLUTION_NCHW_F32,weights_cache_depthwise_3x3)658 TEST(CONVOLUTION_NCHW_F32, weights_cache_depthwise_3x3) {
659   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
660   ConvolutionOperatorTester()
661     .input_size(27, 29)
662     .kernel_size(3, 3)
663     .padding(1)
664     .groups(19)
665     .use_weights_cache(true)
666     .iterations(3)
667     .TestNCHWxF32();
668 }
669 
670 /**************************** DWCONV 3x3 path, batched ****************************/
671 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3)672 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3) {
673   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
674   ConvolutionOperatorTester()
675     .batch_size(2)
676     .input_size(27, 29)
677     .kernel_size(3, 3)
678     .padding(1)
679     .groups(19)
680     .iterations(3)
681     .TestNCHWxF32();
682 }
683 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_zero_weights)684 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_zero_weights) {
685   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
686   ConvolutionOperatorTester()
687     .batch_size(2)
688     .input_size(27, 29)
689     .kernel_size(3, 3)
690     .padding(1)
691     .groups(19)
692     .sparsity(1.0f)
693     .iterations(3)
694     .TestNCHWxF32();
695 }
696 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_varying_input_height)697 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_varying_input_height) {
698   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
699   for (size_t input_height = 25; input_height <= 31; input_height++) {
700     ConvolutionOperatorTester()
701       .batch_size(2)
702       .input_size(input_height, 29)
703       .kernel_size(3, 3)
704       .padding(1)
705       .groups(19)
706       .iterations(1)
707       .TestNCHWxF32();
708   }
709 }
710 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_varying_input_width)711 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_varying_input_width) {
712   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
713   for (size_t input_width = 27; input_width <= 33; input_width++) {
714     ConvolutionOperatorTester()
715       .batch_size(2)
716       .input_size(27, input_width)
717       .kernel_size(3, 3)
718       .padding(1)
719       .groups(19)
720       .iterations(1)
721       .TestNCHWxF32();
722   }
723 }
724 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_varying_channels)725 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_varying_channels) {
726   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
727   for (size_t channels = 1; channels <= 16; channels *= 4) {
728     ConvolutionOperatorTester()
729       .batch_size(2)
730       .input_size(27, 29)
731       .kernel_size(3, 3)
732       .padding(1)
733       .groups(channels)
734       .iterations(1)
735       .TestNCHWxF32();
736   }
737 }
738 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_with_input_stride)739 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_with_input_stride) {
740   ConvolutionOperatorTester()
741     .batch_size(2)
742     .input_size(27, 29)
743     .kernel_size(3, 3)
744     .padding(1)
745     .input_channel_stride(21)
746     .groups(19)
747     .iterations(3)
748     .TestNCHWxF32();
749 }
750 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_with_output_stride)751 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_with_output_stride) {
752   ConvolutionOperatorTester()
753     .batch_size(2)
754     .input_size(27, 29)
755     .kernel_size(3, 3)
756     .padding(1)
757     .output_channel_stride(23)
758     .groups(19)
759     .iterations(3)
760     .TestNCHWxF32();
761 }
762 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_with_qmin)763 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_with_qmin) {
764   ConvolutionOperatorTester()
765     .batch_size(2)
766     .input_size(27, 29)
767     .kernel_size(3, 3)
768     .padding(1)
769     .groups(19)
770     .qmin(128)
771     .iterations(3)
772     .TestNCHWxF32();
773 }
774 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_with_qmax)775 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_with_qmax) {
776   ConvolutionOperatorTester()
777     .batch_size(2)
778     .input_size(27, 29)
779     .kernel_size(3, 3)
780     .padding(1)
781     .groups(19)
782     .qmax(128)
783     .iterations(3)
784     .TestNCHWxF32();
785 }
786 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3_without_bias)787 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3_without_bias) {
788   ConvolutionOperatorTester()
789     .has_bias(false)
790     .batch_size(2)
791     .input_size(27, 29)
792     .kernel_size(3, 3)
793     .padding(1)
794     .groups(19)
795     .iterations(3)
796     .TestNCHWxF32();
797 }
798 
TEST(CONVOLUTION_NCHW_F32,weights_cache_batched_depthwise_3x3)799 TEST(CONVOLUTION_NCHW_F32, weights_cache_batched_depthwise_3x3) {
800   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
801   ConvolutionOperatorTester()
802     .batch_size(2)
803     .input_size(27, 29)
804     .kernel_size(3, 3)
805     .padding(1)
806     .groups(19)
807     .use_weights_cache(true)
808     .iterations(3)
809     .TestNCHWxF32();
810 }
811 
812 /**************************** DWCONV 3x3 stride-2 path ****************************/
813 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2)814 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2) {
815   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
816   ConvolutionOperatorTester()
817     .input_size(27, 29)
818     .kernel_size(3, 3)
819     .padding(1)
820     .subsampling(2)
821     .groups(19)
822     .iterations(3)
823     .TestNCHWxF32();
824 }
825 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_zero_weights)826 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_zero_weights) {
827   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
828   ConvolutionOperatorTester()
829     .input_size(27, 29)
830     .kernel_size(3, 3)
831     .padding(1)
832     .subsampling(2)
833     .groups(19)
834     .sparsity(1.0f)
835     .iterations(3)
836     .TestNCHWxF32();
837 }
838 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_varying_input_height)839 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_varying_input_height) {
840   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
841   for (size_t input_height = 25; input_height <= 31; input_height++) {
842     ConvolutionOperatorTester()
843       .input_size(input_height, 29)
844       .kernel_size(3, 3)
845       .padding(1)
846       .subsampling(2)
847       .groups(19)
848       .iterations(1)
849       .TestNCHWxF32();
850   }
851 }
852 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_varying_input_width)853 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_varying_input_width) {
854   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
855   for (size_t input_width = 27; input_width <= 33; input_width++) {
856     ConvolutionOperatorTester()
857       .input_size(27, input_width)
858       .kernel_size(3, 3)
859       .padding(1)
860       .subsampling(2)
861       .groups(19)
862       .iterations(1)
863       .TestNCHWxF32();
864   }
865 }
866 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_varying_channels)867 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_varying_channels) {
868   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
869   for (size_t channels = 1; channels <= 16; channels *= 4) {
870     ConvolutionOperatorTester()
871       .input_size(27, 29)
872       .kernel_size(3, 3)
873       .padding(1)
874       .subsampling(2)
875       .groups(channels)
876       .iterations(1)
877       .TestNCHWxF32();
878   }
879 }
880 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_with_qmin)881 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_with_qmin) {
882   ConvolutionOperatorTester()
883     .input_size(27, 29)
884     .kernel_size(3, 3)
885     .padding(1)
886     .subsampling(2)
887     .groups(19)
888     .qmin(128)
889     .iterations(3)
890     .TestNCHWxF32();
891 }
892 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_with_qmax)893 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_with_qmax) {
894   ConvolutionOperatorTester()
895     .input_size(27, 29)
896     .kernel_size(3, 3)
897     .padding(1)
898     .subsampling(2)
899     .groups(19)
900     .qmax(128)
901     .iterations(3)
902     .TestNCHWxF32();
903 }
904 
TEST(CONVOLUTION_NCHW_F32,depthwise_3x3s2_without_bias)905 TEST(CONVOLUTION_NCHW_F32, depthwise_3x3s2_without_bias) {
906   ConvolutionOperatorTester()
907     .has_bias(false)
908     .input_size(27, 29)
909     .kernel_size(3, 3)
910     .padding(1)
911     .subsampling(2)
912     .groups(19)
913     .iterations(3)
914     .TestNCHWxF32();
915 }
916 
917 /**************************** DWCONV 3x3 stride-2 path, batched ****************************/
918 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2)919 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2) {
920   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
921   ConvolutionOperatorTester()
922     .batch_size(2)
923     .input_size(27, 29)
924     .kernel_size(3, 3)
925     .padding(1)
926     .subsampling(2)
927     .groups(19)
928     .iterations(3)
929     .TestNCHWxF32();
930 }
931 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_zero_weights)932 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_zero_weights) {
933   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
934   ConvolutionOperatorTester()
935     .batch_size(2)
936     .input_size(27, 29)
937     .kernel_size(3, 3)
938     .padding(1)
939     .subsampling(2)
940     .groups(19)
941     .sparsity(1.0f)
942     .iterations(3)
943     .TestNCHWxF32();
944 }
945 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_varying_input_height)946 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_varying_input_height) {
947   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
948   for (size_t input_height = 25; input_height <= 31; input_height++) {
949     ConvolutionOperatorTester()
950       .batch_size(2)
951       .input_size(input_height, 29)
952       .kernel_size(3, 3)
953       .padding(1)
954       .subsampling(2)
955       .groups(19)
956       .iterations(1)
957       .TestNCHWxF32();
958   }
959 }
960 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_varying_input_width)961 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_varying_input_width) {
962   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
963   for (size_t input_width = 27; input_width <= 33; input_width++) {
964     ConvolutionOperatorTester()
965       .batch_size(2)
966       .input_size(27, input_width)
967       .kernel_size(3, 3)
968       .padding(1)
969       .subsampling(2)
970       .groups(19)
971       .iterations(1)
972       .TestNCHWxF32();
973   }
974 }
975 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_varying_channels)976 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_varying_channels) {
977   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
978   for (size_t channels = 1; channels <= 16; channels *= 4) {
979     ConvolutionOperatorTester()
980       .batch_size(2)
981       .input_size(27, 29)
982       .kernel_size(3, 3)
983       .padding(1)
984       .subsampling(2)
985       .groups(channels)
986       .iterations(1)
987       .TestNCHWxF32();
988   }
989 }
990 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_with_input_stride)991 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_with_input_stride) {
992   ConvolutionOperatorTester()
993     .batch_size(2)
994     .input_size(27, 29)
995     .kernel_size(3, 3)
996     .padding(1)
997     .subsampling(2)
998     .input_channel_stride(21)
999     .groups(19)
1000     .iterations(3)
1001     .TestNCHWxF32();
1002 }
1003 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_with_output_stride)1004 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_with_output_stride) {
1005   ConvolutionOperatorTester()
1006     .batch_size(2)
1007     .input_size(27, 29)
1008     .kernel_size(3, 3)
1009     .padding(1)
1010     .subsampling(2)
1011     .output_channel_stride(23)
1012     .groups(19)
1013     .iterations(3)
1014     .TestNCHWxF32();
1015 }
1016 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_with_qmin)1017 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_with_qmin) {
1018   ConvolutionOperatorTester()
1019     .batch_size(2)
1020     .input_size(27, 29)
1021     .kernel_size(3, 3)
1022     .padding(1)
1023     .subsampling(2)
1024     .groups(19)
1025     .qmin(128)
1026     .iterations(3)
1027     .TestNCHWxF32();
1028 }
1029 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_with_qmax)1030 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_with_qmax) {
1031   ConvolutionOperatorTester()
1032     .batch_size(2)
1033     .input_size(27, 29)
1034     .kernel_size(3, 3)
1035     .padding(1)
1036     .subsampling(2)
1037     .groups(19)
1038     .qmax(128)
1039     .iterations(3)
1040     .TestNCHWxF32();
1041 }
1042 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_3x3s2_without_bias)1043 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_3x3s2_without_bias) {
1044   ConvolutionOperatorTester()
1045     .has_bias(false)
1046     .batch_size(2)
1047     .input_size(27, 29)
1048     .kernel_size(3, 3)
1049     .padding(1)
1050     .subsampling(2)
1051     .groups(19)
1052     .iterations(3)
1053     .TestNCHWxF32();
1054 }
1055 
1056 /**************************** DWCONV 5x5 path ****************************/
1057 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5)1058 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5) {
1059   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1060   ConvolutionOperatorTester()
1061     .input_size(27, 29)
1062     .kernel_size(5, 5)
1063     .padding(2)
1064     .groups(19)
1065     .iterations(3)
1066     .TestNCHWxF32();
1067 }
1068 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_zero_weights)1069 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_zero_weights) {
1070   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1071   ConvolutionOperatorTester()
1072     .input_size(27, 29)
1073     .kernel_size(5, 5)
1074     .padding(2)
1075     .groups(19)
1076     .sparsity(1.0f)
1077     .iterations(3)
1078     .TestNCHWxF32();
1079 }
1080 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_varying_input_height)1081 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_varying_input_height) {
1082   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1083   for (size_t input_height = 25; input_height <= 31; input_height++) {
1084     ConvolutionOperatorTester()
1085       .input_size(input_height, 29)
1086       .kernel_size(5, 5)
1087       .padding(2)
1088       .groups(19)
1089       .iterations(1)
1090       .TestNCHWxF32();
1091   }
1092 }
1093 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_varying_input_width)1094 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_varying_input_width) {
1095   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1096   for (size_t input_width = 27; input_width <= 33; input_width++) {
1097     ConvolutionOperatorTester()
1098       .input_size(27, input_width)
1099       .kernel_size(5, 5)
1100       .padding(2)
1101       .groups(19)
1102       .iterations(1)
1103       .TestNCHWxF32();
1104   }
1105 }
1106 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_varying_channels)1107 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_varying_channels) {
1108   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1109   for (size_t channels = 1; channels <= 16; channels *= 4) {
1110     ConvolutionOperatorTester()
1111       .input_size(27, 29)
1112       .kernel_size(5, 5)
1113       .padding(2)
1114       .groups(channels)
1115       .iterations(1)
1116       .TestNCHWxF32();
1117   }
1118 }
1119 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_with_qmin)1120 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_with_qmin) {
1121   ConvolutionOperatorTester()
1122     .input_size(27, 29)
1123     .kernel_size(5, 5)
1124     .padding(2)
1125     .groups(19)
1126     .qmin(128)
1127     .iterations(3)
1128     .TestNCHWxF32();
1129 }
1130 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_with_qmax)1131 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_with_qmax) {
1132   ConvolutionOperatorTester()
1133     .input_size(27, 29)
1134     .kernel_size(5, 5)
1135     .padding(2)
1136     .groups(19)
1137     .qmax(128)
1138     .iterations(3)
1139     .TestNCHWxF32();
1140 }
1141 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5_without_bias)1142 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5_without_bias) {
1143   ConvolutionOperatorTester()
1144     .has_bias(false)
1145     .input_size(27, 29)
1146     .kernel_size(5, 5)
1147     .padding(2)
1148     .groups(19)
1149     .iterations(3)
1150     .TestNCHWxF32();
1151 }
1152 
1153 /**************************** DWCONV 5x5 path, batched ****************************/
1154 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5)1155 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5) {
1156   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1157   ConvolutionOperatorTester()
1158     .batch_size(2)
1159     .input_size(27, 29)
1160     .kernel_size(5, 5)
1161     .padding(2)
1162     .groups(19)
1163     .iterations(3)
1164     .TestNCHWxF32();
1165 }
1166 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_zero_weights)1167 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_zero_weights) {
1168   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1169   ConvolutionOperatorTester()
1170     .batch_size(2)
1171     .input_size(27, 29)
1172     .kernel_size(5, 5)
1173     .padding(2)
1174     .groups(19)
1175     .sparsity(1.0f)
1176     .iterations(3)
1177     .TestNCHWxF32();
1178 }
1179 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_varying_input_height)1180 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_varying_input_height) {
1181   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1182   for (size_t input_height = 25; input_height <= 31; input_height++) {
1183     ConvolutionOperatorTester()
1184       .batch_size(2)
1185       .input_size(input_height, 29)
1186       .kernel_size(5, 5)
1187       .padding(2)
1188       .groups(19)
1189       .iterations(1)
1190       .TestNCHWxF32();
1191   }
1192 }
1193 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_varying_input_width)1194 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_varying_input_width) {
1195   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1196   for (size_t input_width = 27; input_width <= 33; input_width++) {
1197     ConvolutionOperatorTester()
1198       .batch_size(2)
1199       .input_size(27, input_width)
1200       .kernel_size(5, 5)
1201       .padding(2)
1202       .groups(19)
1203       .iterations(1)
1204       .TestNCHWxF32();
1205   }
1206 }
1207 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_varying_channels)1208 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_varying_channels) {
1209   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1210   for (size_t channels = 1; channels <= 16; channels *= 4) {
1211     ConvolutionOperatorTester()
1212       .batch_size(2)
1213       .input_size(27, 29)
1214       .kernel_size(5, 5)
1215       .padding(2)
1216       .groups(channels)
1217       .iterations(1)
1218       .TestNCHWxF32();
1219   }
1220 }
1221 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_with_input_stride)1222 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_with_input_stride) {
1223   ConvolutionOperatorTester()
1224     .batch_size(2)
1225     .input_size(27, 29)
1226     .kernel_size(5, 5)
1227     .padding(2)
1228     .input_channel_stride(21)
1229     .groups(19)
1230     .iterations(3)
1231     .TestNCHWxF32();
1232 }
1233 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_with_output_stride)1234 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_with_output_stride) {
1235   ConvolutionOperatorTester()
1236     .batch_size(2)
1237     .input_size(27, 29)
1238     .kernel_size(5, 5)
1239     .padding(2)
1240     .output_channel_stride(23)
1241     .groups(19)
1242     .iterations(3)
1243     .TestNCHWxF32();
1244 }
1245 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_with_qmin)1246 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_with_qmin) {
1247   ConvolutionOperatorTester()
1248     .batch_size(2)
1249     .input_size(27, 29)
1250     .kernel_size(5, 5)
1251     .padding(2)
1252     .groups(19)
1253     .qmin(128)
1254     .iterations(3)
1255     .TestNCHWxF32();
1256 }
1257 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_with_qmax)1258 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_with_qmax) {
1259   ConvolutionOperatorTester()
1260     .batch_size(2)
1261     .input_size(27, 29)
1262     .kernel_size(5, 5)
1263     .padding(2)
1264     .groups(19)
1265     .qmax(128)
1266     .iterations(3)
1267     .TestNCHWxF32();
1268 }
1269 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5_without_bias)1270 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5_without_bias) {
1271   ConvolutionOperatorTester()
1272     .has_bias(false)
1273     .batch_size(2)
1274     .input_size(27, 29)
1275     .kernel_size(5, 5)
1276     .padding(2)
1277     .groups(19)
1278     .iterations(3)
1279     .TestNCHWxF32();
1280 }
1281 
1282 /**************************** DWCONV 5x5 stride-2 path ****************************/
1283 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2)1284 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2) {
1285   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1286   ConvolutionOperatorTester()
1287     .input_size(27, 29)
1288     .kernel_size(5, 5)
1289     .padding(2)
1290     .subsampling(2)
1291     .groups(19)
1292     .iterations(3)
1293     .TestNCHWxF32();
1294 }
1295 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_zero_weights)1296 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_zero_weights) {
1297   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1298   ConvolutionOperatorTester()
1299     .input_size(27, 29)
1300     .kernel_size(5, 5)
1301     .padding(2)
1302     .subsampling(2)
1303     .groups(19)
1304     .sparsity(1.0f)
1305     .iterations(3)
1306     .TestNCHWxF32();
1307 }
1308 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_varying_input_height)1309 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_varying_input_height) {
1310   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1311   for (size_t input_height = 25; input_height <= 31; input_height++) {
1312     ConvolutionOperatorTester()
1313       .input_size(input_height, 29)
1314       .kernel_size(5, 5)
1315       .padding(2)
1316       .subsampling(2)
1317       .groups(19)
1318       .iterations(1)
1319       .TestNCHWxF32();
1320   }
1321 }
1322 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_varying_input_width)1323 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_varying_input_width) {
1324   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1325   for (size_t input_width = 27; input_width <= 33; input_width++) {
1326     ConvolutionOperatorTester()
1327       .input_size(27, input_width)
1328       .kernel_size(5, 5)
1329       .padding(2)
1330       .subsampling(2)
1331       .groups(19)
1332       .iterations(1)
1333       .TestNCHWxF32();
1334   }
1335 }
1336 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_varying_channels)1337 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_varying_channels) {
1338   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1339   for (size_t channels = 1; channels <= 16; channels *= 4) {
1340     ConvolutionOperatorTester()
1341       .input_size(27, 29)
1342       .kernel_size(5, 5)
1343       .padding(2)
1344       .subsampling(2)
1345       .groups(channels)
1346       .iterations(1)
1347       .TestNCHWxF32();
1348   }
1349 }
1350 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_with_qmin)1351 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_with_qmin) {
1352   ConvolutionOperatorTester()
1353     .input_size(27, 29)
1354     .kernel_size(5, 5)
1355     .padding(2)
1356     .subsampling(2)
1357     .groups(19)
1358     .qmin(128)
1359     .iterations(3)
1360     .TestNCHWxF32();
1361 }
1362 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_with_qmax)1363 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_with_qmax) {
1364   ConvolutionOperatorTester()
1365     .input_size(27, 29)
1366     .kernel_size(5, 5)
1367     .padding(2)
1368     .subsampling(2)
1369     .groups(19)
1370     .qmax(128)
1371     .iterations(3)
1372     .TestNCHWxF32();
1373 }
1374 
TEST(CONVOLUTION_NCHW_F32,depthwise_5x5s2_without_bias)1375 TEST(CONVOLUTION_NCHW_F32, depthwise_5x5s2_without_bias) {
1376   ConvolutionOperatorTester()
1377     .has_bias(false)
1378     .input_size(27, 29)
1379     .kernel_size(5, 5)
1380     .padding(2)
1381     .subsampling(2)
1382     .groups(19)
1383     .iterations(3)
1384     .TestNCHWxF32();
1385 }
1386 
1387 /**************************** DWCONV 5x5 stride-2 path, batched ****************************/
1388 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2)1389 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2) {
1390   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1391   ConvolutionOperatorTester()
1392     .batch_size(2)
1393     .input_size(27, 29)
1394     .kernel_size(5, 5)
1395     .padding(2)
1396     .subsampling(2)
1397     .groups(19)
1398     .iterations(3)
1399     .TestNCHWxF32();
1400 }
1401 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_zero_weights)1402 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_zero_weights) {
1403   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1404   ConvolutionOperatorTester()
1405     .batch_size(2)
1406     .input_size(27, 29)
1407     .kernel_size(5, 5)
1408     .padding(2)
1409     .subsampling(2)
1410     .groups(19)
1411     .sparsity(1.0f)
1412     .iterations(3)
1413     .TestNCHWxF32();
1414 }
1415 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_varying_input_height)1416 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_varying_input_height) {
1417   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1418   for (size_t input_height = 25; input_height <= 31; input_height++) {
1419     ConvolutionOperatorTester()
1420       .batch_size(2)
1421       .input_size(input_height, 29)
1422       .kernel_size(5, 5)
1423       .padding(2)
1424       .subsampling(2)
1425       .groups(19)
1426       .iterations(1)
1427       .TestNCHWxF32();
1428   }
1429 }
1430 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_varying_input_width)1431 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_varying_input_width) {
1432   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1433   for (size_t input_width = 27; input_width <= 33; input_width++) {
1434     ConvolutionOperatorTester()
1435       .batch_size(2)
1436       .input_size(27, input_width)
1437       .kernel_size(5, 5)
1438       .padding(2)
1439       .subsampling(2)
1440       .groups(19)
1441       .iterations(1)
1442       .TestNCHWxF32();
1443   }
1444 }
1445 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_varying_channels)1446 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_varying_channels) {
1447   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1448   for (size_t channels = 1; channels <= 16; channels *= 4) {
1449     ConvolutionOperatorTester()
1450       .batch_size(2)
1451       .input_size(27, 29)
1452       .kernel_size(5, 5)
1453       .padding(2)
1454       .subsampling(2)
1455       .groups(channels)
1456       .iterations(1)
1457       .TestNCHWxF32();
1458   }
1459 }
1460 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_with_input_stride)1461 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_with_input_stride) {
1462   ConvolutionOperatorTester()
1463     .batch_size(2)
1464     .input_size(27, 29)
1465     .kernel_size(5, 5)
1466     .padding(2)
1467     .subsampling(2)
1468     .input_channel_stride(21)
1469     .groups(19)
1470     .iterations(3)
1471     .TestNCHWxF32();
1472 }
1473 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_with_output_stride)1474 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_with_output_stride) {
1475   ConvolutionOperatorTester()
1476     .batch_size(2)
1477     .input_size(27, 29)
1478     .kernel_size(5, 5)
1479     .padding(2)
1480     .subsampling(2)
1481     .output_channel_stride(23)
1482     .groups(19)
1483     .iterations(3)
1484     .TestNCHWxF32();
1485 }
1486 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_with_qmin)1487 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_with_qmin) {
1488   ConvolutionOperatorTester()
1489     .batch_size(2)
1490     .input_size(27, 29)
1491     .kernel_size(5, 5)
1492     .padding(2)
1493     .subsampling(2)
1494     .groups(19)
1495     .qmin(128)
1496     .iterations(3)
1497     .TestNCHWxF32();
1498 }
1499 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_with_qmax)1500 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_with_qmax) {
1501   ConvolutionOperatorTester()
1502     .batch_size(2)
1503     .input_size(27, 29)
1504     .kernel_size(5, 5)
1505     .padding(2)
1506     .subsampling(2)
1507     .groups(19)
1508     .qmax(128)
1509     .iterations(3)
1510     .TestNCHWxF32();
1511 }
1512 
TEST(CONVOLUTION_NCHW_F32,batched_depthwise_5x5s2_without_bias)1513 TEST(CONVOLUTION_NCHW_F32, batched_depthwise_5x5s2_without_bias) {
1514   ConvolutionOperatorTester()
1515     .has_bias(false)
1516     .batch_size(2)
1517     .input_size(27, 29)
1518     .kernel_size(5, 5)
1519     .padding(2)
1520     .subsampling(2)
1521     .groups(19)
1522     .iterations(3)
1523     .TestNCHWxF32();
1524 }
1525 
1526 /**************************** DWCONV 3x3 path ****************************/
1527 
1528 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 3x3) {
1529   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1530   ConvolutionOperatorTester()
1531     .depthwise_layout(true)
1532     .input_size(27, 29)
1533     .kernel_size(3, 3)
1534     .padding(1)
1535     .groups(19)
1536     .iterations(3)
1537     .TestNCHWxF32();
1538 }
1539 
1540 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 3x3_varying_channels) {
1541   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1542   for (size_t channels = 1; channels <= 16; channels *= 4) {
1543     ConvolutionOperatorTester()
1544       .depthwise_layout(true)
1545       .input_size(27, 29)
1546       .kernel_size(3, 3)
1547       .padding(1)
1548       .groups(channels)
1549       .iterations(1)
1550       .TestNCHWxF32();
1551   }
1552 }
1553 
1554 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 3x3_without_bias) {
1555   ConvolutionOperatorTester()
1556     .depthwise_layout(true)
1557     .has_bias(false)
1558     .input_size(27, 29)
1559     .kernel_size(3, 3)
1560     .padding(1)
1561     .groups(19)
1562     .iterations(3)
1563     .TestNCHWxF32();
1564 }
1565 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,weights_cache_3x3)1566 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, weights_cache_3x3) {
1567   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1568   ConvolutionOperatorTester()
1569     .depthwise_layout(true)
1570     .input_size(27, 29)
1571     .kernel_size(3, 3)
1572     .padding(1)
1573     .groups(19)
1574     .use_weights_cache(true)
1575     .iterations(3)
1576     .TestNCHWxF32();
1577 }
1578 
1579 /**************************** DWCONV 3x3 path, batched ****************************/
1580 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_3x3)1581 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_3x3) {
1582   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1583   ConvolutionOperatorTester()
1584     .depthwise_layout(true)
1585     .batch_size(2)
1586     .input_size(27, 29)
1587     .kernel_size(3, 3)
1588     .padding(1)
1589     .groups(19)
1590     .iterations(3)
1591     .TestNCHWxF32();
1592 }
1593 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_3x3_varying_channels)1594 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_3x3_varying_channels) {
1595   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1596   for (size_t channels = 1; channels <= 16; channels *= 4) {
1597     ConvolutionOperatorTester()
1598       .depthwise_layout(true)
1599       .batch_size(2)
1600       .input_size(27, 29)
1601       .kernel_size(3, 3)
1602       .padding(1)
1603       .groups(channels)
1604       .iterations(1)
1605       .TestNCHWxF32();
1606   }
1607 }
1608 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_3x3_without_bias)1609 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_3x3_without_bias) {
1610   ConvolutionOperatorTester()
1611     .depthwise_layout(true)
1612     .has_bias(false)
1613     .batch_size(2)
1614     .input_size(27, 29)
1615     .kernel_size(3, 3)
1616     .padding(1)
1617     .groups(19)
1618     .iterations(3)
1619     .TestNCHWxF32();
1620 }
1621 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,weights_cache_batched_3x3)1622 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, weights_cache_batched_3x3) {
1623   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1624   ConvolutionOperatorTester()
1625     .depthwise_layout(true)
1626     .batch_size(2)
1627     .input_size(27, 29)
1628     .kernel_size(3, 3)
1629     .padding(1)
1630     .groups(19)
1631     .use_weights_cache(true)
1632     .iterations(3)
1633     .TestNCHWxF32();
1634 }
1635 
1636 /**************************** DWCONV 3x3 stride-2 path ****************************/
1637 
1638 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 3x3s2) {
1639   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1640   ConvolutionOperatorTester()
1641     .depthwise_layout(true)
1642     .input_size(27, 29)
1643     .kernel_size(3, 3)
1644     .padding(1)
1645     .subsampling(2)
1646     .groups(19)
1647     .iterations(3)
1648     .TestNCHWxF32();
1649 }
1650 
1651 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 3x3s2_varying_channels) {
1652   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1653   for (size_t channels = 1; channels <= 16; channels *= 4) {
1654     ConvolutionOperatorTester()
1655       .depthwise_layout(true)
1656       .input_size(27, 29)
1657       .kernel_size(3, 3)
1658       .padding(1)
1659       .subsampling(2)
1660       .groups(channels)
1661       .iterations(1)
1662       .TestNCHWxF32();
1663   }
1664 }
1665 
1666 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 3x3s2_without_bias) {
1667   ConvolutionOperatorTester()
1668     .depthwise_layout(true)
1669     .has_bias(false)
1670     .input_size(27, 29)
1671     .kernel_size(3, 3)
1672     .padding(1)
1673     .subsampling(2)
1674     .groups(19)
1675     .iterations(3)
1676     .TestNCHWxF32();
1677 }
1678 
1679 /**************************** DWCONV 3x3 stride-2 path, batched ****************************/
1680 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_3x3s2)1681 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_3x3s2) {
1682   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1683   ConvolutionOperatorTester()
1684     .depthwise_layout(true)
1685     .batch_size(2)
1686     .input_size(27, 29)
1687     .kernel_size(3, 3)
1688     .padding(1)
1689     .subsampling(2)
1690     .groups(19)
1691     .iterations(3)
1692     .TestNCHWxF32();
1693 }
1694 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_3x3s2_varying_channels)1695 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_3x3s2_varying_channels) {
1696   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1697   for (size_t channels = 1; channels <= 16; channels *= 4) {
1698     ConvolutionOperatorTester()
1699       .depthwise_layout(true)
1700       .batch_size(2)
1701       .input_size(27, 29)
1702       .kernel_size(3, 3)
1703       .padding(1)
1704       .subsampling(2)
1705       .groups(channels)
1706       .iterations(1)
1707       .TestNCHWxF32();
1708   }
1709 }
1710 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_3x3s2_without_bias)1711 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_3x3s2_without_bias) {
1712   ConvolutionOperatorTester()
1713     .depthwise_layout(true)
1714     .has_bias(false)
1715     .batch_size(2)
1716     .input_size(27, 29)
1717     .kernel_size(3, 3)
1718     .padding(1)
1719     .subsampling(2)
1720     .groups(19)
1721     .iterations(3)
1722     .TestNCHWxF32();
1723 }
1724 
1725 /**************************** DWCONV 5x5 path ****************************/
1726 
1727 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 5x5) {
1728   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1729   ConvolutionOperatorTester()
1730     .depthwise_layout(true)
1731     .input_size(27, 29)
1732     .kernel_size(5, 5)
1733     .padding(2)
1734     .groups(19)
1735     .iterations(3)
1736     .TestNCHWxF32();
1737 }
1738 
1739 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 5x5_varying_channels) {
1740   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1741   for (size_t channels = 1; channels <= 16; channels *= 4) {
1742     ConvolutionOperatorTester()
1743       .depthwise_layout(true)
1744       .input_size(27, 29)
1745       .kernel_size(5, 5)
1746       .padding(2)
1747       .groups(channels)
1748       .iterations(1)
1749       .TestNCHWxF32();
1750   }
1751 }
1752 
1753 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 5x5_without_bias) {
1754   ConvolutionOperatorTester()
1755     .depthwise_layout(true)
1756     .has_bias(false)
1757     .input_size(27, 29)
1758     .kernel_size(5, 5)
1759     .padding(2)
1760     .groups(19)
1761     .iterations(3)
1762     .TestNCHWxF32();
1763 }
1764 
1765 /**************************** DWCONV 5x5 path, batched ****************************/
1766 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_5x5)1767 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_5x5) {
1768   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1769   ConvolutionOperatorTester()
1770     .depthwise_layout(true)
1771     .batch_size(2)
1772     .input_size(27, 29)
1773     .kernel_size(5, 5)
1774     .padding(2)
1775     .groups(19)
1776     .iterations(3)
1777     .TestNCHWxF32();
1778 }
1779 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_5x5_varying_channels)1780 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_5x5_varying_channels) {
1781   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1782   for (size_t channels = 1; channels <= 16; channels *= 4) {
1783     ConvolutionOperatorTester()
1784       .depthwise_layout(true)
1785       .batch_size(2)
1786       .input_size(27, 29)
1787       .kernel_size(5, 5)
1788       .padding(2)
1789       .groups(channels)
1790       .iterations(1)
1791       .TestNCHWxF32();
1792   }
1793 }
1794 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_5x5_without_bias)1795 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_5x5_without_bias) {
1796   ConvolutionOperatorTester()
1797     .depthwise_layout(true)
1798     .has_bias(false)
1799     .batch_size(2)
1800     .input_size(27, 29)
1801     .kernel_size(5, 5)
1802     .padding(2)
1803     .groups(19)
1804     .iterations(3)
1805     .TestNCHWxF32();
1806 }
1807 
1808 /**************************** DWCONV 5x5 stride-2 path ****************************/
1809 
1810 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 5x5s2) {
1811   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1812   ConvolutionOperatorTester()
1813     .depthwise_layout(true)
1814     .input_size(27, 29)
1815     .kernel_size(5, 5)
1816     .padding(2)
1817     .subsampling(2)
1818     .groups(19)
1819     .iterations(3)
1820     .TestNCHWxF32();
1821 }
1822 
1823 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 5x5s2_varying_channels) {
1824   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1825   for (size_t channels = 1; channels <= 16; channels *= 4) {
1826     ConvolutionOperatorTester()
1827       .depthwise_layout(true)
1828       .input_size(27, 29)
1829       .kernel_size(5, 5)
1830       .padding(2)
1831       .subsampling(2)
1832       .groups(channels)
1833       .iterations(1)
1834       .TestNCHWxF32();
1835   }
1836 }
1837 
1838 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, 5x5s2_without_bias) {
1839   ConvolutionOperatorTester()
1840     .depthwise_layout(true)
1841     .has_bias(false)
1842     .input_size(27, 29)
1843     .kernel_size(5, 5)
1844     .padding(2)
1845     .subsampling(2)
1846     .groups(19)
1847     .iterations(3)
1848     .TestNCHWxF32();
1849 }
1850 
1851 /**************************** DWCONV 5x5 stride-2 path, batched ****************************/
1852 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_5x5s2)1853 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_5x5s2) {
1854   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1855   ConvolutionOperatorTester()
1856     .depthwise_layout(true)
1857     .batch_size(2)
1858     .input_size(27, 29)
1859     .kernel_size(5, 5)
1860     .padding(2)
1861     .subsampling(2)
1862     .groups(19)
1863     .iterations(3)
1864     .TestNCHWxF32();
1865 }
1866 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_5x5s2_varying_channels)1867 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_5x5s2_varying_channels) {
1868   ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
1869   for (size_t channels = 1; channels <= 16; channels *= 4) {
1870     ConvolutionOperatorTester()
1871       .depthwise_layout(true)
1872       .batch_size(2)
1873       .input_size(27, 29)
1874       .kernel_size(5, 5)
1875       .padding(2)
1876       .subsampling(2)
1877       .groups(channels)
1878       .iterations(1)
1879       .TestNCHWxF32();
1880   }
1881 }
1882 
TEST(DEPTHWISE_CONVOLUTION_NCHW_F32,batched_5x5s2_without_bias)1883 TEST(DEPTHWISE_CONVOLUTION_NCHW_F32, batched_5x5s2_without_bias) {
1884   ConvolutionOperatorTester()
1885     .depthwise_layout(true)
1886     .has_bias(false)
1887     .batch_size(2)
1888     .input_size(27, 29)
1889     .kernel_size(5, 5)
1890     .padding(2)
1891     .subsampling(2)
1892     .groups(19)
1893     .iterations(3)
1894     .TestNCHWxF32();
1895 }
1896