1 // Copyright 2020 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 // 6 // Auto-generated file. Do not edit! 7 // Specification: test/f16-dwconv2d-chw.yaml 8 // Generator: tools/generate-dwconv2d-chw-test.py 9 10 11 #include <gtest/gtest.h> 12 13 #include <xnnpack/common.h> 14 #include <xnnpack/isa-checks.h> 15 16 #include <xnnpack/dwconv.h> 17 #include "dwconv2d-microkernel-tester.h" 18 19 20 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8,output_width_eq_8)21 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8, output_width_eq_8) { 22 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 23 DWConv2DMicrokernelTester() 24 .input_width(8) 25 .input_height(1) 26 .kernel_height(3) 27 .kernel_width(3) 28 .subsampling(1) 29 .padding_left(1) 30 .padding_right(1) 31 .padding_top(1) 32 .padding_bottom(1) 33 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8, xnn_init_f16_chw_params); 34 } 35 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8,output_width_div_8)36 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8, output_width_div_8) { 37 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 38 for (size_t input_width = 16; input_width < 64; input_width += 8) { 39 DWConv2DMicrokernelTester() 40 .input_width(input_width) 41 .input_height(1) 42 .kernel_height(3) 43 .kernel_width(3) 44 .subsampling(1) 45 .padding_left(1) 46 .padding_right(1) 47 .padding_top(1) 48 .padding_bottom(1) 49 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8, xnn_init_f16_chw_params); 50 } 51 } 52 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8,output_width_lt_8)53 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8, output_width_lt_8) { 54 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 55 for (size_t input_width = 1; input_width < 8; input_width++) { 56 DWConv2DMicrokernelTester() 57 .input_width(8) 58 .input_height(1) 59 .kernel_height(3) 60 .kernel_width(3) 61 .subsampling(1) 62 .padding_left(1) 63 .padding_right(1) 64 .padding_top(1) 65 .padding_bottom(1) 66 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8, xnn_init_f16_chw_params); 67 } 68 } 69 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8,output_width_gt_8)70 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8, output_width_gt_8) { 71 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 72 for (size_t input_width = 9; input_width < 17; input_width++) { 73 DWConv2DMicrokernelTester() 74 .input_width(input_width) 75 .input_height(1) 76 .kernel_height(3) 77 .kernel_width(3) 78 .subsampling(1) 79 .padding_left(1) 80 .padding_right(1) 81 .padding_top(1) 82 .padding_bottom(1) 83 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8, xnn_init_f16_chw_params); 84 } 85 } 86 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8,output_height_gt_1)87 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8, output_height_gt_1) { 88 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 89 for (size_t input_height = 2; input_height < 3; input_height++) { 90 for (size_t input_width = 1; input_width < 41; input_width += 7) { 91 DWConv2DMicrokernelTester() 92 .input_width(input_width) 93 .input_height(input_height) 94 .kernel_height(3) 95 .kernel_width(3) 96 .subsampling(1) 97 .padding_left(1) 98 .padding_right(1) 99 .padding_top(1) 100 .padding_bottom(1) 101 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8, xnn_init_f16_chw_params); 102 } 103 } 104 } 105 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 106 107 108 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2,output_width_eq_8)109 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2, output_width_eq_8) { 110 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 111 DWConv2DMicrokernelTester() 112 .input_width(8) 113 .input_height(1) 114 .kernel_height(3) 115 .kernel_width(3) 116 .subsampling(1) 117 .padding_left(1) 118 .padding_right(1) 119 .padding_top(1) 120 .padding_bottom(1) 121 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2, xnn_init_f16_chw_params); 122 } 123 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2,output_width_div_8)124 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2, output_width_div_8) { 125 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 126 for (size_t input_width = 16; input_width < 64; input_width += 8) { 127 DWConv2DMicrokernelTester() 128 .input_width(input_width) 129 .input_height(1) 130 .kernel_height(3) 131 .kernel_width(3) 132 .subsampling(1) 133 .padding_left(1) 134 .padding_right(1) 135 .padding_top(1) 136 .padding_bottom(1) 137 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2, xnn_init_f16_chw_params); 138 } 139 } 140 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2,output_width_lt_8)141 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2, output_width_lt_8) { 142 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 143 for (size_t input_width = 1; input_width < 8; input_width++) { 144 DWConv2DMicrokernelTester() 145 .input_width(8) 146 .input_height(1) 147 .kernel_height(3) 148 .kernel_width(3) 149 .subsampling(1) 150 .padding_left(1) 151 .padding_right(1) 152 .padding_top(1) 153 .padding_bottom(1) 154 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2, xnn_init_f16_chw_params); 155 } 156 } 157 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2,output_width_gt_8)158 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2, output_width_gt_8) { 159 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 160 for (size_t input_width = 9; input_width < 17; input_width++) { 161 DWConv2DMicrokernelTester() 162 .input_width(input_width) 163 .input_height(1) 164 .kernel_height(3) 165 .kernel_width(3) 166 .subsampling(1) 167 .padding_left(1) 168 .padding_right(1) 169 .padding_top(1) 170 .padding_bottom(1) 171 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2, xnn_init_f16_chw_params); 172 } 173 } 174 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2,output_height_gt_1)175 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC2, output_height_gt_1) { 176 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 177 for (size_t input_height = 2; input_height < 3; input_height++) { 178 for (size_t input_width = 1; input_width < 41; input_width += 7) { 179 DWConv2DMicrokernelTester() 180 .input_width(input_width) 181 .input_height(input_height) 182 .kernel_height(3) 183 .kernel_width(3) 184 .subsampling(1) 185 .padding_left(1) 186 .padding_right(1) 187 .padding_top(1) 188 .padding_bottom(1) 189 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2, xnn_init_f16_chw_params); 190 } 191 } 192 } 193 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 194 195 196 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3,output_width_eq_8)197 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3, output_width_eq_8) { 198 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 199 DWConv2DMicrokernelTester() 200 .input_width(8) 201 .input_height(1) 202 .kernel_height(3) 203 .kernel_width(3) 204 .subsampling(1) 205 .padding_left(1) 206 .padding_right(1) 207 .padding_top(1) 208 .padding_bottom(1) 209 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3, xnn_init_f16_chw_params); 210 } 211 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3,output_width_div_8)212 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3, output_width_div_8) { 213 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 214 for (size_t input_width = 16; input_width < 64; input_width += 8) { 215 DWConv2DMicrokernelTester() 216 .input_width(input_width) 217 .input_height(1) 218 .kernel_height(3) 219 .kernel_width(3) 220 .subsampling(1) 221 .padding_left(1) 222 .padding_right(1) 223 .padding_top(1) 224 .padding_bottom(1) 225 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3, xnn_init_f16_chw_params); 226 } 227 } 228 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3,output_width_lt_8)229 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3, output_width_lt_8) { 230 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 231 for (size_t input_width = 1; input_width < 8; input_width++) { 232 DWConv2DMicrokernelTester() 233 .input_width(8) 234 .input_height(1) 235 .kernel_height(3) 236 .kernel_width(3) 237 .subsampling(1) 238 .padding_left(1) 239 .padding_right(1) 240 .padding_top(1) 241 .padding_bottom(1) 242 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3, xnn_init_f16_chw_params); 243 } 244 } 245 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3,output_width_gt_8)246 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3, output_width_gt_8) { 247 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 248 for (size_t input_width = 9; input_width < 17; input_width++) { 249 DWConv2DMicrokernelTester() 250 .input_width(input_width) 251 .input_height(1) 252 .kernel_height(3) 253 .kernel_width(3) 254 .subsampling(1) 255 .padding_left(1) 256 .padding_right(1) 257 .padding_top(1) 258 .padding_bottom(1) 259 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3, xnn_init_f16_chw_params); 260 } 261 } 262 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3,output_height_gt_1)263 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC3, output_height_gt_1) { 264 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 265 for (size_t input_height = 2; input_height < 3; input_height++) { 266 for (size_t input_width = 1; input_width < 41; input_width += 7) { 267 DWConv2DMicrokernelTester() 268 .input_width(input_width) 269 .input_height(input_height) 270 .kernel_height(3) 271 .kernel_width(3) 272 .subsampling(1) 273 .padding_left(1) 274 .padding_right(1) 275 .padding_top(1) 276 .padding_bottom(1) 277 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3, xnn_init_f16_chw_params); 278 } 279 } 280 } 281 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 282 283 284 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4,output_width_eq_8)285 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4, output_width_eq_8) { 286 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 287 DWConv2DMicrokernelTester() 288 .input_width(8) 289 .input_height(1) 290 .kernel_height(3) 291 .kernel_width(3) 292 .subsampling(1) 293 .padding_left(1) 294 .padding_right(1) 295 .padding_top(1) 296 .padding_bottom(1) 297 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4, xnn_init_f16_chw_params); 298 } 299 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4,output_width_div_8)300 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4, output_width_div_8) { 301 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 302 for (size_t input_width = 16; input_width < 64; input_width += 8) { 303 DWConv2DMicrokernelTester() 304 .input_width(input_width) 305 .input_height(1) 306 .kernel_height(3) 307 .kernel_width(3) 308 .subsampling(1) 309 .padding_left(1) 310 .padding_right(1) 311 .padding_top(1) 312 .padding_bottom(1) 313 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4, xnn_init_f16_chw_params); 314 } 315 } 316 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4,output_width_lt_8)317 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4, output_width_lt_8) { 318 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 319 for (size_t input_width = 1; input_width < 8; input_width++) { 320 DWConv2DMicrokernelTester() 321 .input_width(8) 322 .input_height(1) 323 .kernel_height(3) 324 .kernel_width(3) 325 .subsampling(1) 326 .padding_left(1) 327 .padding_right(1) 328 .padding_top(1) 329 .padding_bottom(1) 330 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4, xnn_init_f16_chw_params); 331 } 332 } 333 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4,output_width_gt_8)334 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4, output_width_gt_8) { 335 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 336 for (size_t input_width = 9; input_width < 17; input_width++) { 337 DWConv2DMicrokernelTester() 338 .input_width(input_width) 339 .input_height(1) 340 .kernel_height(3) 341 .kernel_width(3) 342 .subsampling(1) 343 .padding_left(1) 344 .padding_right(1) 345 .padding_top(1) 346 .padding_bottom(1) 347 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4, xnn_init_f16_chw_params); 348 } 349 } 350 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4,output_height_gt_1)351 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_1X8_ACC4, output_height_gt_1) { 352 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 353 for (size_t input_height = 2; input_height < 3; input_height++) { 354 for (size_t input_width = 1; input_width < 41; input_width += 7) { 355 DWConv2DMicrokernelTester() 356 .input_width(input_width) 357 .input_height(input_height) 358 .kernel_height(3) 359 .kernel_width(3) 360 .subsampling(1) 361 .padding_left(1) 362 .padding_right(1) 363 .padding_top(1) 364 .padding_bottom(1) 365 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4, xnn_init_f16_chw_params); 366 } 367 } 368 } 369 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 370 371 372 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_width_eq_8)373 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_width_eq_8) { 374 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 375 DWConv2DMicrokernelTester() 376 .input_width(8) 377 .input_height(2) 378 .kernel_height(3) 379 .kernel_width(3) 380 .subsampling(1) 381 .padding_left(1) 382 .padding_right(1) 383 .padding_top(1) 384 .padding_bottom(1) 385 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 386 } 387 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_width_div_8)388 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_width_div_8) { 389 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 390 for (size_t input_width = 16; input_width < 64; input_width += 8) { 391 DWConv2DMicrokernelTester() 392 .input_width(input_width) 393 .input_height(2) 394 .kernel_height(3) 395 .kernel_width(3) 396 .subsampling(1) 397 .padding_left(1) 398 .padding_right(1) 399 .padding_top(1) 400 .padding_bottom(1) 401 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 402 } 403 } 404 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_width_lt_8)405 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_width_lt_8) { 406 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 407 for (size_t input_width = 1; input_width < 8; input_width++) { 408 DWConv2DMicrokernelTester() 409 .input_width(8) 410 .input_height(2) 411 .kernel_height(3) 412 .kernel_width(3) 413 .subsampling(1) 414 .padding_left(1) 415 .padding_right(1) 416 .padding_top(1) 417 .padding_bottom(1) 418 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 419 } 420 } 421 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_width_gt_8)422 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_width_gt_8) { 423 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 424 for (size_t input_width = 9; input_width < 17; input_width++) { 425 DWConv2DMicrokernelTester() 426 .input_width(input_width) 427 .input_height(2) 428 .kernel_height(3) 429 .kernel_width(3) 430 .subsampling(1) 431 .padding_left(1) 432 .padding_right(1) 433 .padding_top(1) 434 .padding_bottom(1) 435 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 436 } 437 } 438 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_height_div_2)439 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_height_div_2) { 440 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 441 for (size_t input_height = 4; input_height < 16; input_height += 2) { 442 for (size_t input_width = 1; input_width < 41; input_width += 7) { 443 DWConv2DMicrokernelTester() 444 .input_width(input_width) 445 .input_height(input_height) 446 .kernel_height(3) 447 .kernel_width(3) 448 .subsampling(1) 449 .padding_left(1) 450 .padding_right(1) 451 .padding_top(1) 452 .padding_bottom(1) 453 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 454 } 455 } 456 } 457 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_height_lt_2)458 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_height_lt_2) { 459 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 460 for (size_t input_height = 1; input_height < 2; input_height++) { 461 for (size_t input_width = 1; input_width < 41; input_width += 7) { 462 DWConv2DMicrokernelTester() 463 .input_width(input_width) 464 .input_height(input_height) 465 .kernel_height(3) 466 .kernel_width(3) 467 .subsampling(1) 468 .padding_left(1) 469 .padding_right(1) 470 .padding_top(1) 471 .padding_bottom(1) 472 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 473 } 474 } 475 } 476 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8,output_height_gt_2)477 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8, output_height_gt_2) { 478 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 479 for (size_t input_height = 3; input_height < 5; input_height++) { 480 for (size_t input_width = 1; input_width < 41; input_width += 7) { 481 DWConv2DMicrokernelTester() 482 .input_width(input_width) 483 .input_height(input_height) 484 .kernel_height(3) 485 .kernel_width(3) 486 .subsampling(1) 487 .padding_left(1) 488 .padding_right(1) 489 .padding_top(1) 490 .padding_bottom(1) 491 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8, xnn_init_f16_chw_params); 492 } 493 } 494 } 495 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 496 497 498 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_width_eq_8)499 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_width_eq_8) { 500 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 501 DWConv2DMicrokernelTester() 502 .input_width(8) 503 .input_height(2) 504 .kernel_height(3) 505 .kernel_width(3) 506 .subsampling(1) 507 .padding_left(1) 508 .padding_right(1) 509 .padding_top(1) 510 .padding_bottom(1) 511 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 512 } 513 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_width_div_8)514 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_width_div_8) { 515 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 516 for (size_t input_width = 16; input_width < 64; input_width += 8) { 517 DWConv2DMicrokernelTester() 518 .input_width(input_width) 519 .input_height(2) 520 .kernel_height(3) 521 .kernel_width(3) 522 .subsampling(1) 523 .padding_left(1) 524 .padding_right(1) 525 .padding_top(1) 526 .padding_bottom(1) 527 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 528 } 529 } 530 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_width_lt_8)531 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_width_lt_8) { 532 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 533 for (size_t input_width = 1; input_width < 8; input_width++) { 534 DWConv2DMicrokernelTester() 535 .input_width(8) 536 .input_height(2) 537 .kernel_height(3) 538 .kernel_width(3) 539 .subsampling(1) 540 .padding_left(1) 541 .padding_right(1) 542 .padding_top(1) 543 .padding_bottom(1) 544 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 545 } 546 } 547 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_width_gt_8)548 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_width_gt_8) { 549 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 550 for (size_t input_width = 9; input_width < 17; input_width++) { 551 DWConv2DMicrokernelTester() 552 .input_width(input_width) 553 .input_height(2) 554 .kernel_height(3) 555 .kernel_width(3) 556 .subsampling(1) 557 .padding_left(1) 558 .padding_right(1) 559 .padding_top(1) 560 .padding_bottom(1) 561 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 562 } 563 } 564 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_height_div_2)565 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_height_div_2) { 566 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 567 for (size_t input_height = 4; input_height < 16; input_height += 2) { 568 for (size_t input_width = 1; input_width < 41; input_width += 7) { 569 DWConv2DMicrokernelTester() 570 .input_width(input_width) 571 .input_height(input_height) 572 .kernel_height(3) 573 .kernel_width(3) 574 .subsampling(1) 575 .padding_left(1) 576 .padding_right(1) 577 .padding_top(1) 578 .padding_bottom(1) 579 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 580 } 581 } 582 } 583 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_height_lt_2)584 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_height_lt_2) { 585 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 586 for (size_t input_height = 1; input_height < 2; input_height++) { 587 for (size_t input_width = 1; input_width < 41; input_width += 7) { 588 DWConv2DMicrokernelTester() 589 .input_width(input_width) 590 .input_height(input_height) 591 .kernel_height(3) 592 .kernel_width(3) 593 .subsampling(1) 594 .padding_left(1) 595 .padding_right(1) 596 .padding_top(1) 597 .padding_bottom(1) 598 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 599 } 600 } 601 } 602 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2,output_height_gt_2)603 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_2X8_ACC2, output_height_gt_2) { 604 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 605 for (size_t input_height = 3; input_height < 5; input_height++) { 606 for (size_t input_width = 1; input_width < 41; input_width += 7) { 607 DWConv2DMicrokernelTester() 608 .input_width(input_width) 609 .input_height(input_height) 610 .kernel_height(3) 611 .kernel_width(3) 612 .subsampling(1) 613 .padding_left(1) 614 .padding_right(1) 615 .padding_top(1) 616 .padding_bottom(1) 617 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2, xnn_init_f16_chw_params); 618 } 619 } 620 } 621 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 622 623 624 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_width_eq_8)625 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_width_eq_8) { 626 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 627 DWConv2DMicrokernelTester() 628 .input_width(8) 629 .input_height(3) 630 .kernel_height(3) 631 .kernel_width(3) 632 .subsampling(1) 633 .padding_left(1) 634 .padding_right(1) 635 .padding_top(1) 636 .padding_bottom(1) 637 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 638 } 639 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_width_div_8)640 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_width_div_8) { 641 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 642 for (size_t input_width = 16; input_width < 64; input_width += 8) { 643 DWConv2DMicrokernelTester() 644 .input_width(input_width) 645 .input_height(3) 646 .kernel_height(3) 647 .kernel_width(3) 648 .subsampling(1) 649 .padding_left(1) 650 .padding_right(1) 651 .padding_top(1) 652 .padding_bottom(1) 653 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 654 } 655 } 656 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_width_lt_8)657 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_width_lt_8) { 658 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 659 for (size_t input_width = 1; input_width < 8; input_width++) { 660 DWConv2DMicrokernelTester() 661 .input_width(8) 662 .input_height(3) 663 .kernel_height(3) 664 .kernel_width(3) 665 .subsampling(1) 666 .padding_left(1) 667 .padding_right(1) 668 .padding_top(1) 669 .padding_bottom(1) 670 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 671 } 672 } 673 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_width_gt_8)674 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_width_gt_8) { 675 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 676 for (size_t input_width = 9; input_width < 17; input_width++) { 677 DWConv2DMicrokernelTester() 678 .input_width(input_width) 679 .input_height(3) 680 .kernel_height(3) 681 .kernel_width(3) 682 .subsampling(1) 683 .padding_left(1) 684 .padding_right(1) 685 .padding_top(1) 686 .padding_bottom(1) 687 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 688 } 689 } 690 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_height_div_3)691 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_height_div_3) { 692 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 693 for (size_t input_height = 6; input_height < 24; input_height += 3) { 694 for (size_t input_width = 1; input_width < 41; input_width += 7) { 695 DWConv2DMicrokernelTester() 696 .input_width(input_width) 697 .input_height(input_height) 698 .kernel_height(3) 699 .kernel_width(3) 700 .subsampling(1) 701 .padding_left(1) 702 .padding_right(1) 703 .padding_top(1) 704 .padding_bottom(1) 705 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 706 } 707 } 708 } 709 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_height_lt_3)710 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_height_lt_3) { 711 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 712 for (size_t input_height = 1; input_height < 3; input_height++) { 713 for (size_t input_width = 1; input_width < 41; input_width += 7) { 714 DWConv2DMicrokernelTester() 715 .input_width(input_width) 716 .input_height(input_height) 717 .kernel_height(3) 718 .kernel_width(3) 719 .subsampling(1) 720 .padding_left(1) 721 .padding_right(1) 722 .padding_top(1) 723 .padding_bottom(1) 724 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 725 } 726 } 727 } 728 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8,output_height_gt_3)729 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_3X8, output_height_gt_3) { 730 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 731 for (size_t input_height = 4; input_height < 7; input_height++) { 732 for (size_t input_width = 1; input_width < 41; input_width += 7) { 733 DWConv2DMicrokernelTester() 734 .input_width(input_width) 735 .input_height(input_height) 736 .kernel_height(3) 737 .kernel_width(3) 738 .subsampling(1) 739 .padding_left(1) 740 .padding_right(1) 741 .padding_top(1) 742 .padding_bottom(1) 743 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8, xnn_init_f16_chw_params); 744 } 745 } 746 } 747 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 748 749 750 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_width_eq_8)751 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_width_eq_8) { 752 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 753 DWConv2DMicrokernelTester() 754 .input_width(8) 755 .input_height(4) 756 .kernel_height(3) 757 .kernel_width(3) 758 .subsampling(1) 759 .padding_left(1) 760 .padding_right(1) 761 .padding_top(1) 762 .padding_bottom(1) 763 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 764 } 765 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_width_div_8)766 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_width_div_8) { 767 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 768 for (size_t input_width = 16; input_width < 64; input_width += 8) { 769 DWConv2DMicrokernelTester() 770 .input_width(input_width) 771 .input_height(4) 772 .kernel_height(3) 773 .kernel_width(3) 774 .subsampling(1) 775 .padding_left(1) 776 .padding_right(1) 777 .padding_top(1) 778 .padding_bottom(1) 779 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 780 } 781 } 782 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_width_lt_8)783 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_width_lt_8) { 784 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 785 for (size_t input_width = 1; input_width < 8; input_width++) { 786 DWConv2DMicrokernelTester() 787 .input_width(8) 788 .input_height(4) 789 .kernel_height(3) 790 .kernel_width(3) 791 .subsampling(1) 792 .padding_left(1) 793 .padding_right(1) 794 .padding_top(1) 795 .padding_bottom(1) 796 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 797 } 798 } 799 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_width_gt_8)800 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_width_gt_8) { 801 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 802 for (size_t input_width = 9; input_width < 17; input_width++) { 803 DWConv2DMicrokernelTester() 804 .input_width(input_width) 805 .input_height(4) 806 .kernel_height(3) 807 .kernel_width(3) 808 .subsampling(1) 809 .padding_left(1) 810 .padding_right(1) 811 .padding_top(1) 812 .padding_bottom(1) 813 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 814 } 815 } 816 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_height_div_4)817 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_height_div_4) { 818 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 819 for (size_t input_height = 8; input_height < 32; input_height += 4) { 820 for (size_t input_width = 1; input_width < 41; input_width += 7) { 821 DWConv2DMicrokernelTester() 822 .input_width(input_width) 823 .input_height(input_height) 824 .kernel_height(3) 825 .kernel_width(3) 826 .subsampling(1) 827 .padding_left(1) 828 .padding_right(1) 829 .padding_top(1) 830 .padding_bottom(1) 831 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 832 } 833 } 834 } 835 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_height_lt_4)836 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_height_lt_4) { 837 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 838 for (size_t input_height = 1; input_height < 4; input_height++) { 839 for (size_t input_width = 1; input_width < 41; input_width += 7) { 840 DWConv2DMicrokernelTester() 841 .input_width(input_width) 842 .input_height(input_height) 843 .kernel_height(3) 844 .kernel_width(3) 845 .subsampling(1) 846 .padding_left(1) 847 .padding_right(1) 848 .padding_top(1) 849 .padding_bottom(1) 850 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 851 } 852 } 853 } 854 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8,output_height_gt_4)855 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_4X8, output_height_gt_4) { 856 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 857 for (size_t input_height = 5; input_height < 9; input_height++) { 858 for (size_t input_width = 1; input_width < 41; input_width += 7) { 859 DWConv2DMicrokernelTester() 860 .input_width(input_width) 861 .input_height(input_height) 862 .kernel_height(3) 863 .kernel_width(3) 864 .subsampling(1) 865 .padding_left(1) 866 .padding_right(1) 867 .padding_top(1) 868 .padding_bottom(1) 869 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8, xnn_init_f16_chw_params); 870 } 871 } 872 } 873 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 874 875 876 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_width_eq_8)877 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_width_eq_8) { 878 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 879 DWConv2DMicrokernelTester() 880 .input_width(8) 881 .input_height(5) 882 .kernel_height(3) 883 .kernel_width(3) 884 .subsampling(1) 885 .padding_left(1) 886 .padding_right(1) 887 .padding_top(1) 888 .padding_bottom(1) 889 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 890 } 891 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_width_div_8)892 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_width_div_8) { 893 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 894 for (size_t input_width = 16; input_width < 64; input_width += 8) { 895 DWConv2DMicrokernelTester() 896 .input_width(input_width) 897 .input_height(5) 898 .kernel_height(3) 899 .kernel_width(3) 900 .subsampling(1) 901 .padding_left(1) 902 .padding_right(1) 903 .padding_top(1) 904 .padding_bottom(1) 905 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 906 } 907 } 908 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_width_lt_8)909 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_width_lt_8) { 910 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 911 for (size_t input_width = 1; input_width < 8; input_width++) { 912 DWConv2DMicrokernelTester() 913 .input_width(8) 914 .input_height(5) 915 .kernel_height(3) 916 .kernel_width(3) 917 .subsampling(1) 918 .padding_left(1) 919 .padding_right(1) 920 .padding_top(1) 921 .padding_bottom(1) 922 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 923 } 924 } 925 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_width_gt_8)926 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_width_gt_8) { 927 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 928 for (size_t input_width = 9; input_width < 17; input_width++) { 929 DWConv2DMicrokernelTester() 930 .input_width(input_width) 931 .input_height(5) 932 .kernel_height(3) 933 .kernel_width(3) 934 .subsampling(1) 935 .padding_left(1) 936 .padding_right(1) 937 .padding_top(1) 938 .padding_bottom(1) 939 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 940 } 941 } 942 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_height_div_5)943 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_height_div_5) { 944 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 945 for (size_t input_height = 10; input_height < 40; input_height += 5) { 946 for (size_t input_width = 1; input_width < 41; input_width += 7) { 947 DWConv2DMicrokernelTester() 948 .input_width(input_width) 949 .input_height(input_height) 950 .kernel_height(3) 951 .kernel_width(3) 952 .subsampling(1) 953 .padding_left(1) 954 .padding_right(1) 955 .padding_top(1) 956 .padding_bottom(1) 957 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 958 } 959 } 960 } 961 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_height_lt_5)962 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_height_lt_5) { 963 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 964 for (size_t input_height = 1; input_height < 5; input_height++) { 965 for (size_t input_width = 1; input_width < 41; input_width += 7) { 966 DWConv2DMicrokernelTester() 967 .input_width(input_width) 968 .input_height(input_height) 969 .kernel_height(3) 970 .kernel_width(3) 971 .subsampling(1) 972 .padding_left(1) 973 .padding_right(1) 974 .padding_top(1) 975 .padding_bottom(1) 976 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 977 } 978 } 979 } 980 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8,output_height_gt_5)981 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_5X8, output_height_gt_5) { 982 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 983 for (size_t input_height = 6; input_height < 11; input_height++) { 984 for (size_t input_width = 1; input_width < 41; input_width += 7) { 985 DWConv2DMicrokernelTester() 986 .input_width(input_width) 987 .input_height(input_height) 988 .kernel_height(3) 989 .kernel_width(3) 990 .subsampling(1) 991 .padding_left(1) 992 .padding_right(1) 993 .padding_top(1) 994 .padding_bottom(1) 995 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8, xnn_init_f16_chw_params); 996 } 997 } 998 } 999 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1000 1001 1002 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_width_eq_8)1003 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_width_eq_8) { 1004 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1005 DWConv2DMicrokernelTester() 1006 .input_width(8) 1007 .input_height(6) 1008 .kernel_height(3) 1009 .kernel_width(3) 1010 .subsampling(1) 1011 .padding_left(1) 1012 .padding_right(1) 1013 .padding_top(1) 1014 .padding_bottom(1) 1015 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1016 } 1017 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_width_div_8)1018 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_width_div_8) { 1019 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1020 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1021 DWConv2DMicrokernelTester() 1022 .input_width(input_width) 1023 .input_height(6) 1024 .kernel_height(3) 1025 .kernel_width(3) 1026 .subsampling(1) 1027 .padding_left(1) 1028 .padding_right(1) 1029 .padding_top(1) 1030 .padding_bottom(1) 1031 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1032 } 1033 } 1034 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_width_lt_8)1035 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_width_lt_8) { 1036 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1037 for (size_t input_width = 1; input_width < 8; input_width++) { 1038 DWConv2DMicrokernelTester() 1039 .input_width(8) 1040 .input_height(6) 1041 .kernel_height(3) 1042 .kernel_width(3) 1043 .subsampling(1) 1044 .padding_left(1) 1045 .padding_right(1) 1046 .padding_top(1) 1047 .padding_bottom(1) 1048 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1049 } 1050 } 1051 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_width_gt_8)1052 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_width_gt_8) { 1053 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1054 for (size_t input_width = 9; input_width < 17; input_width++) { 1055 DWConv2DMicrokernelTester() 1056 .input_width(input_width) 1057 .input_height(6) 1058 .kernel_height(3) 1059 .kernel_width(3) 1060 .subsampling(1) 1061 .padding_left(1) 1062 .padding_right(1) 1063 .padding_top(1) 1064 .padding_bottom(1) 1065 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1066 } 1067 } 1068 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_height_div_6)1069 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_height_div_6) { 1070 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1071 for (size_t input_height = 12; input_height < 48; input_height += 6) { 1072 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1073 DWConv2DMicrokernelTester() 1074 .input_width(input_width) 1075 .input_height(input_height) 1076 .kernel_height(3) 1077 .kernel_width(3) 1078 .subsampling(1) 1079 .padding_left(1) 1080 .padding_right(1) 1081 .padding_top(1) 1082 .padding_bottom(1) 1083 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1084 } 1085 } 1086 } 1087 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_height_lt_6)1088 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_height_lt_6) { 1089 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1090 for (size_t input_height = 1; input_height < 6; input_height++) { 1091 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1092 DWConv2DMicrokernelTester() 1093 .input_width(input_width) 1094 .input_height(input_height) 1095 .kernel_height(3) 1096 .kernel_width(3) 1097 .subsampling(1) 1098 .padding_left(1) 1099 .padding_right(1) 1100 .padding_top(1) 1101 .padding_bottom(1) 1102 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1103 } 1104 } 1105 } 1106 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8,output_height_gt_6)1107 TEST(F16_DWCONV2D_CHW_3X3P1__NEONFP16ARITH_6X8, output_height_gt_6) { 1108 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1109 for (size_t input_height = 7; input_height < 13; input_height++) { 1110 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1111 DWConv2DMicrokernelTester() 1112 .input_width(input_width) 1113 .input_height(input_height) 1114 .kernel_height(3) 1115 .kernel_width(3) 1116 .subsampling(1) 1117 .padding_left(1) 1118 .padding_right(1) 1119 .padding_top(1) 1120 .padding_bottom(1) 1121 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8, xnn_init_f16_chw_params); 1122 } 1123 } 1124 } 1125 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1126 1127 1128 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,output_width_eq_4)1129 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, output_width_eq_4) { 1130 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1131 for (size_t input_width = 7; input_width < 9; input_width++) { 1132 DWConv2DMicrokernelTester() 1133 .input_width(input_width) 1134 .input_height(2) 1135 .kernel_height(3) 1136 .kernel_width(3) 1137 .subsampling(2) 1138 .padding_left(1) 1139 .padding_right(1) 1140 .padding_top(1) 1141 .padding_bottom(1) 1142 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1143 } 1144 } 1145 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,output_width_div_4)1146 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, output_width_div_4) { 1147 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1148 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1149 DWConv2DMicrokernelTester() 1150 .input_width(input_width) 1151 .input_height(2) 1152 .kernel_height(3) 1153 .kernel_width(3) 1154 .subsampling(2) 1155 .padding_left(1) 1156 .padding_right(1) 1157 .padding_top(1) 1158 .padding_bottom(1) 1159 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1160 } 1161 } 1162 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,output_width_lt_4)1163 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, output_width_lt_4) { 1164 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1165 for (size_t input_width = 1; input_width < 7; input_width++) { 1166 DWConv2DMicrokernelTester() 1167 .input_width(8) 1168 .input_height(2) 1169 .kernel_height(3) 1170 .kernel_width(3) 1171 .subsampling(2) 1172 .padding_left(1) 1173 .padding_right(1) 1174 .padding_top(1) 1175 .padding_bottom(1) 1176 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1177 } 1178 } 1179 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,output_width_gt_4)1180 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, output_width_gt_4) { 1181 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1182 for (size_t input_width = 9; input_width < 17; input_width++) { 1183 DWConv2DMicrokernelTester() 1184 .input_width(input_width) 1185 .input_height(2) 1186 .kernel_height(3) 1187 .kernel_width(3) 1188 .subsampling(2) 1189 .padding_left(1) 1190 .padding_right(1) 1191 .padding_top(1) 1192 .padding_bottom(1) 1193 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1194 } 1195 } 1196 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,output_height_eq_1)1197 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, output_height_eq_1) { 1198 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1199 for (size_t input_height = 1; input_height < 3; input_height++) { 1200 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1201 DWConv2DMicrokernelTester() 1202 .input_width(input_width) 1203 .input_height(input_height) 1204 .kernel_height(3) 1205 .kernel_width(3) 1206 .subsampling(2) 1207 .padding_left(1) 1208 .padding_right(1) 1209 .padding_top(1) 1210 .padding_bottom(1) 1211 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1212 } 1213 } 1214 } 1215 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,output_height_gt_1)1216 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, output_height_gt_1) { 1217 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1218 for (size_t input_height = 3; input_height < 5; input_height++) { 1219 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1220 DWConv2DMicrokernelTester() 1221 .input_width(input_width) 1222 .input_height(input_height) 1223 .kernel_height(3) 1224 .kernel_width(3) 1225 .subsampling(2) 1226 .padding_left(1) 1227 .padding_right(1) 1228 .padding_top(1) 1229 .padding_bottom(1) 1230 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1231 } 1232 } 1233 } 1234 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4,padding_top_eq_1)1235 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4, padding_top_eq_1) { 1236 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1237 for (size_t input_height = 2; input_height < 8; input_height++) { 1238 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1239 DWConv2DMicrokernelTester() 1240 .input_width(input_width) 1241 .input_height(input_height) 1242 .kernel_height(3) 1243 .kernel_width(3) 1244 .subsampling(2) 1245 .padding_left(1) 1246 .padding_right(1) 1247 .padding_top(0) 1248 .padding_bottom(1) 1249 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4, xnn_init_f16_chw_params); 1250 } 1251 } 1252 } 1253 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1254 1255 1256 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,output_width_eq_4)1257 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, output_width_eq_4) { 1258 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1259 for (size_t input_width = 7; input_width < 9; input_width++) { 1260 DWConv2DMicrokernelTester() 1261 .input_width(input_width) 1262 .input_height(2) 1263 .kernel_height(3) 1264 .kernel_width(3) 1265 .subsampling(2) 1266 .padding_left(1) 1267 .padding_right(1) 1268 .padding_top(1) 1269 .padding_bottom(1) 1270 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1271 } 1272 } 1273 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,output_width_div_4)1274 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, output_width_div_4) { 1275 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1276 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1277 DWConv2DMicrokernelTester() 1278 .input_width(input_width) 1279 .input_height(2) 1280 .kernel_height(3) 1281 .kernel_width(3) 1282 .subsampling(2) 1283 .padding_left(1) 1284 .padding_right(1) 1285 .padding_top(1) 1286 .padding_bottom(1) 1287 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1288 } 1289 } 1290 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,output_width_lt_4)1291 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, output_width_lt_4) { 1292 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1293 for (size_t input_width = 1; input_width < 7; input_width++) { 1294 DWConv2DMicrokernelTester() 1295 .input_width(8) 1296 .input_height(2) 1297 .kernel_height(3) 1298 .kernel_width(3) 1299 .subsampling(2) 1300 .padding_left(1) 1301 .padding_right(1) 1302 .padding_top(1) 1303 .padding_bottom(1) 1304 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1305 } 1306 } 1307 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,output_width_gt_4)1308 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, output_width_gt_4) { 1309 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1310 for (size_t input_width = 9; input_width < 17; input_width++) { 1311 DWConv2DMicrokernelTester() 1312 .input_width(input_width) 1313 .input_height(2) 1314 .kernel_height(3) 1315 .kernel_width(3) 1316 .subsampling(2) 1317 .padding_left(1) 1318 .padding_right(1) 1319 .padding_top(1) 1320 .padding_bottom(1) 1321 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1322 } 1323 } 1324 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,output_height_eq_1)1325 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, output_height_eq_1) { 1326 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1327 for (size_t input_height = 1; input_height < 3; input_height++) { 1328 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1329 DWConv2DMicrokernelTester() 1330 .input_width(input_width) 1331 .input_height(input_height) 1332 .kernel_height(3) 1333 .kernel_width(3) 1334 .subsampling(2) 1335 .padding_left(1) 1336 .padding_right(1) 1337 .padding_top(1) 1338 .padding_bottom(1) 1339 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1340 } 1341 } 1342 } 1343 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,output_height_gt_1)1344 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, output_height_gt_1) { 1345 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1346 for (size_t input_height = 3; input_height < 5; input_height++) { 1347 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1348 DWConv2DMicrokernelTester() 1349 .input_width(input_width) 1350 .input_height(input_height) 1351 .kernel_height(3) 1352 .kernel_width(3) 1353 .subsampling(2) 1354 .padding_left(1) 1355 .padding_right(1) 1356 .padding_top(1) 1357 .padding_bottom(1) 1358 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1359 } 1360 } 1361 } 1362 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2,padding_top_eq_1)1363 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC2, padding_top_eq_1) { 1364 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1365 for (size_t input_height = 2; input_height < 8; input_height++) { 1366 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1367 DWConv2DMicrokernelTester() 1368 .input_width(input_width) 1369 .input_height(input_height) 1370 .kernel_height(3) 1371 .kernel_width(3) 1372 .subsampling(2) 1373 .padding_left(1) 1374 .padding_right(1) 1375 .padding_top(0) 1376 .padding_bottom(1) 1377 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 1378 } 1379 } 1380 } 1381 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1382 1383 1384 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,output_width_eq_4)1385 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, output_width_eq_4) { 1386 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1387 for (size_t input_width = 7; input_width < 9; input_width++) { 1388 DWConv2DMicrokernelTester() 1389 .input_width(input_width) 1390 .input_height(2) 1391 .kernel_height(3) 1392 .kernel_width(3) 1393 .subsampling(2) 1394 .padding_left(1) 1395 .padding_right(1) 1396 .padding_top(1) 1397 .padding_bottom(1) 1398 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1399 } 1400 } 1401 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,output_width_div_4)1402 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, output_width_div_4) { 1403 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1404 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1405 DWConv2DMicrokernelTester() 1406 .input_width(input_width) 1407 .input_height(2) 1408 .kernel_height(3) 1409 .kernel_width(3) 1410 .subsampling(2) 1411 .padding_left(1) 1412 .padding_right(1) 1413 .padding_top(1) 1414 .padding_bottom(1) 1415 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1416 } 1417 } 1418 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,output_width_lt_4)1419 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, output_width_lt_4) { 1420 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1421 for (size_t input_width = 1; input_width < 7; input_width++) { 1422 DWConv2DMicrokernelTester() 1423 .input_width(8) 1424 .input_height(2) 1425 .kernel_height(3) 1426 .kernel_width(3) 1427 .subsampling(2) 1428 .padding_left(1) 1429 .padding_right(1) 1430 .padding_top(1) 1431 .padding_bottom(1) 1432 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1433 } 1434 } 1435 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,output_width_gt_4)1436 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, output_width_gt_4) { 1437 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1438 for (size_t input_width = 9; input_width < 17; input_width++) { 1439 DWConv2DMicrokernelTester() 1440 .input_width(input_width) 1441 .input_height(2) 1442 .kernel_height(3) 1443 .kernel_width(3) 1444 .subsampling(2) 1445 .padding_left(1) 1446 .padding_right(1) 1447 .padding_top(1) 1448 .padding_bottom(1) 1449 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1450 } 1451 } 1452 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,output_height_eq_1)1453 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, output_height_eq_1) { 1454 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1455 for (size_t input_height = 1; input_height < 3; input_height++) { 1456 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1457 DWConv2DMicrokernelTester() 1458 .input_width(input_width) 1459 .input_height(input_height) 1460 .kernel_height(3) 1461 .kernel_width(3) 1462 .subsampling(2) 1463 .padding_left(1) 1464 .padding_right(1) 1465 .padding_top(1) 1466 .padding_bottom(1) 1467 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1468 } 1469 } 1470 } 1471 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,output_height_gt_1)1472 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, output_height_gt_1) { 1473 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1474 for (size_t input_height = 3; input_height < 5; input_height++) { 1475 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1476 DWConv2DMicrokernelTester() 1477 .input_width(input_width) 1478 .input_height(input_height) 1479 .kernel_height(3) 1480 .kernel_width(3) 1481 .subsampling(2) 1482 .padding_left(1) 1483 .padding_right(1) 1484 .padding_top(1) 1485 .padding_bottom(1) 1486 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1487 } 1488 } 1489 } 1490 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3,padding_top_eq_1)1491 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC3, padding_top_eq_1) { 1492 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1493 for (size_t input_height = 2; input_height < 8; input_height++) { 1494 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1495 DWConv2DMicrokernelTester() 1496 .input_width(input_width) 1497 .input_height(input_height) 1498 .kernel_height(3) 1499 .kernel_width(3) 1500 .subsampling(2) 1501 .padding_left(1) 1502 .padding_right(1) 1503 .padding_top(0) 1504 .padding_bottom(1) 1505 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 1506 } 1507 } 1508 } 1509 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1510 1511 1512 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,output_width_eq_4)1513 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, output_width_eq_4) { 1514 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1515 for (size_t input_width = 7; input_width < 9; input_width++) { 1516 DWConv2DMicrokernelTester() 1517 .input_width(input_width) 1518 .input_height(2) 1519 .kernel_height(3) 1520 .kernel_width(3) 1521 .subsampling(2) 1522 .padding_left(1) 1523 .padding_right(1) 1524 .padding_top(1) 1525 .padding_bottom(1) 1526 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1527 } 1528 } 1529 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,output_width_div_4)1530 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, output_width_div_4) { 1531 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1532 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1533 DWConv2DMicrokernelTester() 1534 .input_width(input_width) 1535 .input_height(2) 1536 .kernel_height(3) 1537 .kernel_width(3) 1538 .subsampling(2) 1539 .padding_left(1) 1540 .padding_right(1) 1541 .padding_top(1) 1542 .padding_bottom(1) 1543 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1544 } 1545 } 1546 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,output_width_lt_4)1547 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, output_width_lt_4) { 1548 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1549 for (size_t input_width = 1; input_width < 7; input_width++) { 1550 DWConv2DMicrokernelTester() 1551 .input_width(8) 1552 .input_height(2) 1553 .kernel_height(3) 1554 .kernel_width(3) 1555 .subsampling(2) 1556 .padding_left(1) 1557 .padding_right(1) 1558 .padding_top(1) 1559 .padding_bottom(1) 1560 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1561 } 1562 } 1563 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,output_width_gt_4)1564 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, output_width_gt_4) { 1565 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1566 for (size_t input_width = 9; input_width < 17; input_width++) { 1567 DWConv2DMicrokernelTester() 1568 .input_width(input_width) 1569 .input_height(2) 1570 .kernel_height(3) 1571 .kernel_width(3) 1572 .subsampling(2) 1573 .padding_left(1) 1574 .padding_right(1) 1575 .padding_top(1) 1576 .padding_bottom(1) 1577 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1578 } 1579 } 1580 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,output_height_eq_1)1581 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, output_height_eq_1) { 1582 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1583 for (size_t input_height = 1; input_height < 3; input_height++) { 1584 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1585 DWConv2DMicrokernelTester() 1586 .input_width(input_width) 1587 .input_height(input_height) 1588 .kernel_height(3) 1589 .kernel_width(3) 1590 .subsampling(2) 1591 .padding_left(1) 1592 .padding_right(1) 1593 .padding_top(1) 1594 .padding_bottom(1) 1595 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1596 } 1597 } 1598 } 1599 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,output_height_gt_1)1600 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, output_height_gt_1) { 1601 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1602 for (size_t input_height = 3; input_height < 5; input_height++) { 1603 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1604 DWConv2DMicrokernelTester() 1605 .input_width(input_width) 1606 .input_height(input_height) 1607 .kernel_height(3) 1608 .kernel_width(3) 1609 .subsampling(2) 1610 .padding_left(1) 1611 .padding_right(1) 1612 .padding_top(1) 1613 .padding_bottom(1) 1614 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1615 } 1616 } 1617 } 1618 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4,padding_top_eq_1)1619 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_1X4_ACC4, padding_top_eq_1) { 1620 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1621 for (size_t input_height = 2; input_height < 8; input_height++) { 1622 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1623 DWConv2DMicrokernelTester() 1624 .input_width(input_width) 1625 .input_height(input_height) 1626 .kernel_height(3) 1627 .kernel_width(3) 1628 .subsampling(2) 1629 .padding_left(1) 1630 .padding_right(1) 1631 .padding_top(0) 1632 .padding_bottom(1) 1633 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 1634 } 1635 } 1636 } 1637 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1638 1639 1640 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_width_eq_4)1641 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_width_eq_4) { 1642 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1643 for (size_t input_width = 7; input_width < 9; input_width++) { 1644 DWConv2DMicrokernelTester() 1645 .input_width(input_width) 1646 .input_height(4) 1647 .kernel_height(3) 1648 .kernel_width(3) 1649 .subsampling(2) 1650 .padding_left(1) 1651 .padding_right(1) 1652 .padding_top(1) 1653 .padding_bottom(1) 1654 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1655 } 1656 } 1657 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_width_div_4)1658 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_width_div_4) { 1659 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1660 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1661 DWConv2DMicrokernelTester() 1662 .input_width(input_width) 1663 .input_height(4) 1664 .kernel_height(3) 1665 .kernel_width(3) 1666 .subsampling(2) 1667 .padding_left(1) 1668 .padding_right(1) 1669 .padding_top(1) 1670 .padding_bottom(1) 1671 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1672 } 1673 } 1674 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_width_lt_4)1675 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_width_lt_4) { 1676 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1677 for (size_t input_width = 1; input_width < 7; input_width++) { 1678 DWConv2DMicrokernelTester() 1679 .input_width(8) 1680 .input_height(4) 1681 .kernel_height(3) 1682 .kernel_width(3) 1683 .subsampling(2) 1684 .padding_left(1) 1685 .padding_right(1) 1686 .padding_top(1) 1687 .padding_bottom(1) 1688 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1689 } 1690 } 1691 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_width_gt_4)1692 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_width_gt_4) { 1693 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1694 for (size_t input_width = 9; input_width < 17; input_width++) { 1695 DWConv2DMicrokernelTester() 1696 .input_width(input_width) 1697 .input_height(4) 1698 .kernel_height(3) 1699 .kernel_width(3) 1700 .subsampling(2) 1701 .padding_left(1) 1702 .padding_right(1) 1703 .padding_top(1) 1704 .padding_bottom(1) 1705 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1706 } 1707 } 1708 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_height_eq_2)1709 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_height_eq_2) { 1710 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1711 for (size_t input_height = 3; input_height < 5; input_height++) { 1712 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1713 DWConv2DMicrokernelTester() 1714 .input_width(input_width) 1715 .input_height(input_height) 1716 .kernel_height(3) 1717 .kernel_width(3) 1718 .subsampling(2) 1719 .padding_left(1) 1720 .padding_right(1) 1721 .padding_top(1) 1722 .padding_bottom(1) 1723 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1724 } 1725 } 1726 } 1727 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_height_div_2)1728 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_height_div_2) { 1729 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1730 for (size_t input_height = 8; input_height < 32; input_height += 4) { 1731 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1732 DWConv2DMicrokernelTester() 1733 .input_width(input_width) 1734 .input_height(input_height) 1735 .kernel_height(3) 1736 .kernel_width(3) 1737 .subsampling(2) 1738 .padding_left(1) 1739 .padding_right(1) 1740 .padding_top(1) 1741 .padding_bottom(1) 1742 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1743 } 1744 } 1745 } 1746 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_height_lt_2)1747 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_height_lt_2) { 1748 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1749 for (size_t input_height = 1; input_height < 3; input_height++) { 1750 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1751 DWConv2DMicrokernelTester() 1752 .input_width(input_width) 1753 .input_height(input_height) 1754 .kernel_height(3) 1755 .kernel_width(3) 1756 .subsampling(2) 1757 .padding_left(1) 1758 .padding_right(1) 1759 .padding_top(1) 1760 .padding_bottom(1) 1761 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1762 } 1763 } 1764 } 1765 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,output_height_gt_2)1766 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, output_height_gt_2) { 1767 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1768 for (size_t input_height = 5; input_height < 9; input_height++) { 1769 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1770 DWConv2DMicrokernelTester() 1771 .input_width(input_width) 1772 .input_height(input_height) 1773 .kernel_height(3) 1774 .kernel_width(3) 1775 .subsampling(2) 1776 .padding_left(1) 1777 .padding_right(1) 1778 .padding_top(1) 1779 .padding_bottom(1) 1780 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1781 } 1782 } 1783 } 1784 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4,padding_top_eq_1)1785 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4, padding_top_eq_1) { 1786 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1787 for (size_t input_height = 2; input_height < 14; input_height++) { 1788 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1789 DWConv2DMicrokernelTester() 1790 .input_width(input_width) 1791 .input_height(input_height) 1792 .kernel_height(3) 1793 .kernel_width(3) 1794 .subsampling(2) 1795 .padding_left(1) 1796 .padding_right(1) 1797 .padding_top(0) 1798 .padding_bottom(1) 1799 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4, xnn_init_f16_chw_params); 1800 } 1801 } 1802 } 1803 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1804 1805 1806 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_width_eq_4)1807 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_width_eq_4) { 1808 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1809 for (size_t input_width = 7; input_width < 9; input_width++) { 1810 DWConv2DMicrokernelTester() 1811 .input_width(input_width) 1812 .input_height(4) 1813 .kernel_height(3) 1814 .kernel_width(3) 1815 .subsampling(2) 1816 .padding_left(1) 1817 .padding_right(1) 1818 .padding_top(1) 1819 .padding_bottom(1) 1820 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1821 } 1822 } 1823 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_width_div_4)1824 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_width_div_4) { 1825 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1826 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1827 DWConv2DMicrokernelTester() 1828 .input_width(input_width) 1829 .input_height(4) 1830 .kernel_height(3) 1831 .kernel_width(3) 1832 .subsampling(2) 1833 .padding_left(1) 1834 .padding_right(1) 1835 .padding_top(1) 1836 .padding_bottom(1) 1837 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1838 } 1839 } 1840 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_width_lt_4)1841 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_width_lt_4) { 1842 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1843 for (size_t input_width = 1; input_width < 7; input_width++) { 1844 DWConv2DMicrokernelTester() 1845 .input_width(8) 1846 .input_height(4) 1847 .kernel_height(3) 1848 .kernel_width(3) 1849 .subsampling(2) 1850 .padding_left(1) 1851 .padding_right(1) 1852 .padding_top(1) 1853 .padding_bottom(1) 1854 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1855 } 1856 } 1857 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_width_gt_4)1858 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_width_gt_4) { 1859 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1860 for (size_t input_width = 9; input_width < 17; input_width++) { 1861 DWConv2DMicrokernelTester() 1862 .input_width(input_width) 1863 .input_height(4) 1864 .kernel_height(3) 1865 .kernel_width(3) 1866 .subsampling(2) 1867 .padding_left(1) 1868 .padding_right(1) 1869 .padding_top(1) 1870 .padding_bottom(1) 1871 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1872 } 1873 } 1874 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_height_eq_2)1875 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_height_eq_2) { 1876 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1877 for (size_t input_height = 3; input_height < 5; input_height++) { 1878 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1879 DWConv2DMicrokernelTester() 1880 .input_width(input_width) 1881 .input_height(input_height) 1882 .kernel_height(3) 1883 .kernel_width(3) 1884 .subsampling(2) 1885 .padding_left(1) 1886 .padding_right(1) 1887 .padding_top(1) 1888 .padding_bottom(1) 1889 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1890 } 1891 } 1892 } 1893 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_height_div_2)1894 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_height_div_2) { 1895 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1896 for (size_t input_height = 8; input_height < 32; input_height += 4) { 1897 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1898 DWConv2DMicrokernelTester() 1899 .input_width(input_width) 1900 .input_height(input_height) 1901 .kernel_height(3) 1902 .kernel_width(3) 1903 .subsampling(2) 1904 .padding_left(1) 1905 .padding_right(1) 1906 .padding_top(1) 1907 .padding_bottom(1) 1908 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1909 } 1910 } 1911 } 1912 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_height_lt_2)1913 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_height_lt_2) { 1914 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1915 for (size_t input_height = 1; input_height < 3; input_height++) { 1916 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1917 DWConv2DMicrokernelTester() 1918 .input_width(input_width) 1919 .input_height(input_height) 1920 .kernel_height(3) 1921 .kernel_width(3) 1922 .subsampling(2) 1923 .padding_left(1) 1924 .padding_right(1) 1925 .padding_top(1) 1926 .padding_bottom(1) 1927 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1928 } 1929 } 1930 } 1931 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,output_height_gt_2)1932 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, output_height_gt_2) { 1933 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1934 for (size_t input_height = 5; input_height < 9; input_height++) { 1935 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1936 DWConv2DMicrokernelTester() 1937 .input_width(input_width) 1938 .input_height(input_height) 1939 .kernel_height(3) 1940 .kernel_width(3) 1941 .subsampling(2) 1942 .padding_left(1) 1943 .padding_right(1) 1944 .padding_top(1) 1945 .padding_bottom(1) 1946 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1947 } 1948 } 1949 } 1950 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2,padding_top_eq_1)1951 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_2X4_ACC2, padding_top_eq_1) { 1952 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1953 for (size_t input_height = 2; input_height < 14; input_height++) { 1954 for (size_t input_width = 1; input_width < 41; input_width += 7) { 1955 DWConv2DMicrokernelTester() 1956 .input_width(input_width) 1957 .input_height(input_height) 1958 .kernel_height(3) 1959 .kernel_width(3) 1960 .subsampling(2) 1961 .padding_left(1) 1962 .padding_right(1) 1963 .padding_top(0) 1964 .padding_bottom(1) 1965 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 1966 } 1967 } 1968 } 1969 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1970 1971 1972 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_width_eq_4)1973 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_width_eq_4) { 1974 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1975 for (size_t input_width = 7; input_width < 9; input_width++) { 1976 DWConv2DMicrokernelTester() 1977 .input_width(input_width) 1978 .input_height(6) 1979 .kernel_height(3) 1980 .kernel_width(3) 1981 .subsampling(2) 1982 .padding_left(1) 1983 .padding_right(1) 1984 .padding_top(1) 1985 .padding_bottom(1) 1986 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 1987 } 1988 } 1989 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_width_div_4)1990 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_width_div_4) { 1991 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1992 for (size_t input_width = 16; input_width < 64; input_width += 8) { 1993 DWConv2DMicrokernelTester() 1994 .input_width(input_width) 1995 .input_height(6) 1996 .kernel_height(3) 1997 .kernel_width(3) 1998 .subsampling(2) 1999 .padding_left(1) 2000 .padding_right(1) 2001 .padding_top(1) 2002 .padding_bottom(1) 2003 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2004 } 2005 } 2006 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_width_lt_4)2007 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_width_lt_4) { 2008 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2009 for (size_t input_width = 1; input_width < 7; input_width++) { 2010 DWConv2DMicrokernelTester() 2011 .input_width(8) 2012 .input_height(6) 2013 .kernel_height(3) 2014 .kernel_width(3) 2015 .subsampling(2) 2016 .padding_left(1) 2017 .padding_right(1) 2018 .padding_top(1) 2019 .padding_bottom(1) 2020 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2021 } 2022 } 2023 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_width_gt_4)2024 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_width_gt_4) { 2025 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2026 for (size_t input_width = 9; input_width < 17; input_width++) { 2027 DWConv2DMicrokernelTester() 2028 .input_width(input_width) 2029 .input_height(6) 2030 .kernel_height(3) 2031 .kernel_width(3) 2032 .subsampling(2) 2033 .padding_left(1) 2034 .padding_right(1) 2035 .padding_top(1) 2036 .padding_bottom(1) 2037 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2038 } 2039 } 2040 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_height_eq_3)2041 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_height_eq_3) { 2042 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2043 for (size_t input_height = 5; input_height < 7; input_height++) { 2044 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2045 DWConv2DMicrokernelTester() 2046 .input_width(input_width) 2047 .input_height(input_height) 2048 .kernel_height(3) 2049 .kernel_width(3) 2050 .subsampling(2) 2051 .padding_left(1) 2052 .padding_right(1) 2053 .padding_top(1) 2054 .padding_bottom(1) 2055 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2056 } 2057 } 2058 } 2059 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_height_div_3)2060 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_height_div_3) { 2061 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2062 for (size_t input_height = 12; input_height < 48; input_height += 6) { 2063 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2064 DWConv2DMicrokernelTester() 2065 .input_width(input_width) 2066 .input_height(input_height) 2067 .kernel_height(3) 2068 .kernel_width(3) 2069 .subsampling(2) 2070 .padding_left(1) 2071 .padding_right(1) 2072 .padding_top(1) 2073 .padding_bottom(1) 2074 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2075 } 2076 } 2077 } 2078 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_height_lt_3)2079 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_height_lt_3) { 2080 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2081 for (size_t input_height = 1; input_height < 5; input_height++) { 2082 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2083 DWConv2DMicrokernelTester() 2084 .input_width(input_width) 2085 .input_height(input_height) 2086 .kernel_height(3) 2087 .kernel_width(3) 2088 .subsampling(2) 2089 .padding_left(1) 2090 .padding_right(1) 2091 .padding_top(1) 2092 .padding_bottom(1) 2093 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2094 } 2095 } 2096 } 2097 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,output_height_gt_3)2098 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, output_height_gt_3) { 2099 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2100 for (size_t input_height = 7; input_height < 13; input_height++) { 2101 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2102 DWConv2DMicrokernelTester() 2103 .input_width(input_width) 2104 .input_height(input_height) 2105 .kernel_height(3) 2106 .kernel_width(3) 2107 .subsampling(2) 2108 .padding_left(1) 2109 .padding_right(1) 2110 .padding_top(1) 2111 .padding_bottom(1) 2112 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2113 } 2114 } 2115 } 2116 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4,padding_top_eq_1)2117 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_3X4, padding_top_eq_1) { 2118 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2119 for (size_t input_height = 2; input_height < 20; input_height++) { 2120 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2121 DWConv2DMicrokernelTester() 2122 .input_width(input_width) 2123 .input_height(input_height) 2124 .kernel_height(3) 2125 .kernel_width(3) 2126 .subsampling(2) 2127 .padding_left(1) 2128 .padding_right(1) 2129 .padding_top(0) 2130 .padding_bottom(1) 2131 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4, xnn_init_f16_chw_params); 2132 } 2133 } 2134 } 2135 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2136 2137 2138 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_width_eq_4)2139 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_width_eq_4) { 2140 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2141 for (size_t input_width = 7; input_width < 9; input_width++) { 2142 DWConv2DMicrokernelTester() 2143 .input_width(input_width) 2144 .input_height(8) 2145 .kernel_height(3) 2146 .kernel_width(3) 2147 .subsampling(2) 2148 .padding_left(1) 2149 .padding_right(1) 2150 .padding_top(1) 2151 .padding_bottom(1) 2152 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2153 } 2154 } 2155 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_width_div_4)2156 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_width_div_4) { 2157 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2158 for (size_t input_width = 16; input_width < 64; input_width += 8) { 2159 DWConv2DMicrokernelTester() 2160 .input_width(input_width) 2161 .input_height(8) 2162 .kernel_height(3) 2163 .kernel_width(3) 2164 .subsampling(2) 2165 .padding_left(1) 2166 .padding_right(1) 2167 .padding_top(1) 2168 .padding_bottom(1) 2169 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2170 } 2171 } 2172 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_width_lt_4)2173 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_width_lt_4) { 2174 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2175 for (size_t input_width = 1; input_width < 7; input_width++) { 2176 DWConv2DMicrokernelTester() 2177 .input_width(8) 2178 .input_height(8) 2179 .kernel_height(3) 2180 .kernel_width(3) 2181 .subsampling(2) 2182 .padding_left(1) 2183 .padding_right(1) 2184 .padding_top(1) 2185 .padding_bottom(1) 2186 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2187 } 2188 } 2189 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_width_gt_4)2190 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_width_gt_4) { 2191 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2192 for (size_t input_width = 9; input_width < 17; input_width++) { 2193 DWConv2DMicrokernelTester() 2194 .input_width(input_width) 2195 .input_height(8) 2196 .kernel_height(3) 2197 .kernel_width(3) 2198 .subsampling(2) 2199 .padding_left(1) 2200 .padding_right(1) 2201 .padding_top(1) 2202 .padding_bottom(1) 2203 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2204 } 2205 } 2206 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_height_eq_4)2207 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_height_eq_4) { 2208 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2209 for (size_t input_height = 7; input_height < 9; input_height++) { 2210 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2211 DWConv2DMicrokernelTester() 2212 .input_width(input_width) 2213 .input_height(input_height) 2214 .kernel_height(3) 2215 .kernel_width(3) 2216 .subsampling(2) 2217 .padding_left(1) 2218 .padding_right(1) 2219 .padding_top(1) 2220 .padding_bottom(1) 2221 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2222 } 2223 } 2224 } 2225 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_height_div_4)2226 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_height_div_4) { 2227 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2228 for (size_t input_height = 16; input_height < 64; input_height += 8) { 2229 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2230 DWConv2DMicrokernelTester() 2231 .input_width(input_width) 2232 .input_height(input_height) 2233 .kernel_height(3) 2234 .kernel_width(3) 2235 .subsampling(2) 2236 .padding_left(1) 2237 .padding_right(1) 2238 .padding_top(1) 2239 .padding_bottom(1) 2240 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2241 } 2242 } 2243 } 2244 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_height_lt_4)2245 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_height_lt_4) { 2246 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2247 for (size_t input_height = 1; input_height < 7; input_height++) { 2248 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2249 DWConv2DMicrokernelTester() 2250 .input_width(input_width) 2251 .input_height(input_height) 2252 .kernel_height(3) 2253 .kernel_width(3) 2254 .subsampling(2) 2255 .padding_left(1) 2256 .padding_right(1) 2257 .padding_top(1) 2258 .padding_bottom(1) 2259 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2260 } 2261 } 2262 } 2263 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,output_height_gt_4)2264 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, output_height_gt_4) { 2265 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2266 for (size_t input_height = 9; input_height < 17; input_height++) { 2267 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2268 DWConv2DMicrokernelTester() 2269 .input_width(input_width) 2270 .input_height(input_height) 2271 .kernel_height(3) 2272 .kernel_width(3) 2273 .subsampling(2) 2274 .padding_left(1) 2275 .padding_right(1) 2276 .padding_top(1) 2277 .padding_bottom(1) 2278 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2279 } 2280 } 2281 } 2282 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4,padding_top_eq_1)2283 TEST(F16_DWCONV2D_CHW_3X3S2P1__NEONFP16ARITH_4X4, padding_top_eq_1) { 2284 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2285 for (size_t input_height = 2; input_height < 26; input_height++) { 2286 for (size_t input_width = 1; input_width < 41; input_width += 7) { 2287 DWConv2DMicrokernelTester() 2288 .input_width(input_width) 2289 .input_height(input_height) 2290 .kernel_height(3) 2291 .kernel_width(3) 2292 .subsampling(2) 2293 .padding_left(1) 2294 .padding_right(1) 2295 .padding_top(0) 2296 .padding_bottom(1) 2297 .Test(xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_4x4, xnn_init_f16_chw_params); 2298 } 2299 } 2300 } 2301 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2302 2303 2304 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4,output_width_eq_4)2305 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4, output_width_eq_4) { 2306 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2307 DWConv2DMicrokernelTester() 2308 .input_width(4) 2309 .input_height(1) 2310 .kernel_height(5) 2311 .kernel_width(5) 2312 .subsampling(1) 2313 .padding_left(2) 2314 .padding_right(2) 2315 .padding_top(2) 2316 .padding_bottom(2) 2317 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 2318 } 2319 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4,output_width_div_4)2320 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4, output_width_div_4) { 2321 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2322 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2323 DWConv2DMicrokernelTester() 2324 .input_width(input_width) 2325 .input_height(1) 2326 .kernel_height(5) 2327 .kernel_width(5) 2328 .subsampling(1) 2329 .padding_left(2) 2330 .padding_right(2) 2331 .padding_top(2) 2332 .padding_bottom(2) 2333 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 2334 } 2335 } 2336 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4,output_width_lt_4)2337 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4, output_width_lt_4) { 2338 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2339 for (size_t input_width = 1; input_width < 4; input_width++) { 2340 DWConv2DMicrokernelTester() 2341 .input_width(4) 2342 .input_height(1) 2343 .kernel_height(5) 2344 .kernel_width(5) 2345 .subsampling(1) 2346 .padding_left(2) 2347 .padding_right(2) 2348 .padding_top(2) 2349 .padding_bottom(2) 2350 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 2351 } 2352 } 2353 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4,output_width_gt_4)2354 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4, output_width_gt_4) { 2355 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2356 for (size_t input_width = 5; input_width < 9; input_width++) { 2357 DWConv2DMicrokernelTester() 2358 .input_width(input_width) 2359 .input_height(1) 2360 .kernel_height(5) 2361 .kernel_width(5) 2362 .subsampling(1) 2363 .padding_left(2) 2364 .padding_right(2) 2365 .padding_top(2) 2366 .padding_bottom(2) 2367 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 2368 } 2369 } 2370 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4,output_height_gt_1)2371 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4, output_height_gt_1) { 2372 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2373 for (size_t input_height = 2; input_height < 3; input_height++) { 2374 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2375 DWConv2DMicrokernelTester() 2376 .input_width(input_width) 2377 .input_height(input_height) 2378 .kernel_height(5) 2379 .kernel_width(5) 2380 .subsampling(1) 2381 .padding_left(2) 2382 .padding_right(2) 2383 .padding_top(2) 2384 .padding_bottom(2) 2385 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 2386 } 2387 } 2388 } 2389 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2390 2391 2392 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2,output_width_eq_4)2393 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2, output_width_eq_4) { 2394 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2395 DWConv2DMicrokernelTester() 2396 .input_width(4) 2397 .input_height(1) 2398 .kernel_height(5) 2399 .kernel_width(5) 2400 .subsampling(1) 2401 .padding_left(2) 2402 .padding_right(2) 2403 .padding_top(2) 2404 .padding_bottom(2) 2405 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 2406 } 2407 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2,output_width_div_4)2408 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2, output_width_div_4) { 2409 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2410 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2411 DWConv2DMicrokernelTester() 2412 .input_width(input_width) 2413 .input_height(1) 2414 .kernel_height(5) 2415 .kernel_width(5) 2416 .subsampling(1) 2417 .padding_left(2) 2418 .padding_right(2) 2419 .padding_top(2) 2420 .padding_bottom(2) 2421 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 2422 } 2423 } 2424 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2,output_width_lt_4)2425 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2, output_width_lt_4) { 2426 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2427 for (size_t input_width = 1; input_width < 4; input_width++) { 2428 DWConv2DMicrokernelTester() 2429 .input_width(4) 2430 .input_height(1) 2431 .kernel_height(5) 2432 .kernel_width(5) 2433 .subsampling(1) 2434 .padding_left(2) 2435 .padding_right(2) 2436 .padding_top(2) 2437 .padding_bottom(2) 2438 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 2439 } 2440 } 2441 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2,output_width_gt_4)2442 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2, output_width_gt_4) { 2443 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2444 for (size_t input_width = 5; input_width < 9; input_width++) { 2445 DWConv2DMicrokernelTester() 2446 .input_width(input_width) 2447 .input_height(1) 2448 .kernel_height(5) 2449 .kernel_width(5) 2450 .subsampling(1) 2451 .padding_left(2) 2452 .padding_right(2) 2453 .padding_top(2) 2454 .padding_bottom(2) 2455 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 2456 } 2457 } 2458 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2,output_height_gt_1)2459 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC2, output_height_gt_1) { 2460 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2461 for (size_t input_height = 2; input_height < 3; input_height++) { 2462 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2463 DWConv2DMicrokernelTester() 2464 .input_width(input_width) 2465 .input_height(input_height) 2466 .kernel_height(5) 2467 .kernel_width(5) 2468 .subsampling(1) 2469 .padding_left(2) 2470 .padding_right(2) 2471 .padding_top(2) 2472 .padding_bottom(2) 2473 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 2474 } 2475 } 2476 } 2477 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2478 2479 2480 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3,output_width_eq_4)2481 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3, output_width_eq_4) { 2482 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2483 DWConv2DMicrokernelTester() 2484 .input_width(4) 2485 .input_height(1) 2486 .kernel_height(5) 2487 .kernel_width(5) 2488 .subsampling(1) 2489 .padding_left(2) 2490 .padding_right(2) 2491 .padding_top(2) 2492 .padding_bottom(2) 2493 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 2494 } 2495 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3,output_width_div_4)2496 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3, output_width_div_4) { 2497 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2498 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2499 DWConv2DMicrokernelTester() 2500 .input_width(input_width) 2501 .input_height(1) 2502 .kernel_height(5) 2503 .kernel_width(5) 2504 .subsampling(1) 2505 .padding_left(2) 2506 .padding_right(2) 2507 .padding_top(2) 2508 .padding_bottom(2) 2509 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 2510 } 2511 } 2512 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3,output_width_lt_4)2513 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3, output_width_lt_4) { 2514 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2515 for (size_t input_width = 1; input_width < 4; input_width++) { 2516 DWConv2DMicrokernelTester() 2517 .input_width(4) 2518 .input_height(1) 2519 .kernel_height(5) 2520 .kernel_width(5) 2521 .subsampling(1) 2522 .padding_left(2) 2523 .padding_right(2) 2524 .padding_top(2) 2525 .padding_bottom(2) 2526 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 2527 } 2528 } 2529 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3,output_width_gt_4)2530 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3, output_width_gt_4) { 2531 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2532 for (size_t input_width = 5; input_width < 9; input_width++) { 2533 DWConv2DMicrokernelTester() 2534 .input_width(input_width) 2535 .input_height(1) 2536 .kernel_height(5) 2537 .kernel_width(5) 2538 .subsampling(1) 2539 .padding_left(2) 2540 .padding_right(2) 2541 .padding_top(2) 2542 .padding_bottom(2) 2543 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 2544 } 2545 } 2546 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3,output_height_gt_1)2547 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC3, output_height_gt_1) { 2548 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2549 for (size_t input_height = 2; input_height < 3; input_height++) { 2550 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2551 DWConv2DMicrokernelTester() 2552 .input_width(input_width) 2553 .input_height(input_height) 2554 .kernel_height(5) 2555 .kernel_width(5) 2556 .subsampling(1) 2557 .padding_left(2) 2558 .padding_right(2) 2559 .padding_top(2) 2560 .padding_bottom(2) 2561 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 2562 } 2563 } 2564 } 2565 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2566 2567 2568 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4,output_width_eq_4)2569 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4, output_width_eq_4) { 2570 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2571 DWConv2DMicrokernelTester() 2572 .input_width(4) 2573 .input_height(1) 2574 .kernel_height(5) 2575 .kernel_width(5) 2576 .subsampling(1) 2577 .padding_left(2) 2578 .padding_right(2) 2579 .padding_top(2) 2580 .padding_bottom(2) 2581 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 2582 } 2583 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4,output_width_div_4)2584 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4, output_width_div_4) { 2585 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2586 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2587 DWConv2DMicrokernelTester() 2588 .input_width(input_width) 2589 .input_height(1) 2590 .kernel_height(5) 2591 .kernel_width(5) 2592 .subsampling(1) 2593 .padding_left(2) 2594 .padding_right(2) 2595 .padding_top(2) 2596 .padding_bottom(2) 2597 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 2598 } 2599 } 2600 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4,output_width_lt_4)2601 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4, output_width_lt_4) { 2602 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2603 for (size_t input_width = 1; input_width < 4; input_width++) { 2604 DWConv2DMicrokernelTester() 2605 .input_width(4) 2606 .input_height(1) 2607 .kernel_height(5) 2608 .kernel_width(5) 2609 .subsampling(1) 2610 .padding_left(2) 2611 .padding_right(2) 2612 .padding_top(2) 2613 .padding_bottom(2) 2614 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 2615 } 2616 } 2617 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4,output_width_gt_4)2618 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4, output_width_gt_4) { 2619 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2620 for (size_t input_width = 5; input_width < 9; input_width++) { 2621 DWConv2DMicrokernelTester() 2622 .input_width(input_width) 2623 .input_height(1) 2624 .kernel_height(5) 2625 .kernel_width(5) 2626 .subsampling(1) 2627 .padding_left(2) 2628 .padding_right(2) 2629 .padding_top(2) 2630 .padding_bottom(2) 2631 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 2632 } 2633 } 2634 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4,output_height_gt_1)2635 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC4, output_height_gt_1) { 2636 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2637 for (size_t input_height = 2; input_height < 3; input_height++) { 2638 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2639 DWConv2DMicrokernelTester() 2640 .input_width(input_width) 2641 .input_height(input_height) 2642 .kernel_height(5) 2643 .kernel_width(5) 2644 .subsampling(1) 2645 .padding_left(2) 2646 .padding_right(2) 2647 .padding_top(2) 2648 .padding_bottom(2) 2649 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 2650 } 2651 } 2652 } 2653 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2654 2655 2656 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5,output_width_eq_4)2657 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5, output_width_eq_4) { 2658 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2659 DWConv2DMicrokernelTester() 2660 .input_width(4) 2661 .input_height(1) 2662 .kernel_height(5) 2663 .kernel_width(5) 2664 .subsampling(1) 2665 .padding_left(2) 2666 .padding_right(2) 2667 .padding_top(2) 2668 .padding_bottom(2) 2669 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 2670 } 2671 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5,output_width_div_4)2672 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5, output_width_div_4) { 2673 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2674 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2675 DWConv2DMicrokernelTester() 2676 .input_width(input_width) 2677 .input_height(1) 2678 .kernel_height(5) 2679 .kernel_width(5) 2680 .subsampling(1) 2681 .padding_left(2) 2682 .padding_right(2) 2683 .padding_top(2) 2684 .padding_bottom(2) 2685 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 2686 } 2687 } 2688 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5,output_width_lt_4)2689 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5, output_width_lt_4) { 2690 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2691 for (size_t input_width = 1; input_width < 4; input_width++) { 2692 DWConv2DMicrokernelTester() 2693 .input_width(4) 2694 .input_height(1) 2695 .kernel_height(5) 2696 .kernel_width(5) 2697 .subsampling(1) 2698 .padding_left(2) 2699 .padding_right(2) 2700 .padding_top(2) 2701 .padding_bottom(2) 2702 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 2703 } 2704 } 2705 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5,output_width_gt_4)2706 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5, output_width_gt_4) { 2707 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2708 for (size_t input_width = 5; input_width < 9; input_width++) { 2709 DWConv2DMicrokernelTester() 2710 .input_width(input_width) 2711 .input_height(1) 2712 .kernel_height(5) 2713 .kernel_width(5) 2714 .subsampling(1) 2715 .padding_left(2) 2716 .padding_right(2) 2717 .padding_top(2) 2718 .padding_bottom(2) 2719 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 2720 } 2721 } 2722 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5,output_height_gt_1)2723 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_1X4_ACC5, output_height_gt_1) { 2724 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2725 for (size_t input_height = 2; input_height < 3; input_height++) { 2726 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2727 DWConv2DMicrokernelTester() 2728 .input_width(input_width) 2729 .input_height(input_height) 2730 .kernel_height(5) 2731 .kernel_width(5) 2732 .subsampling(1) 2733 .padding_left(2) 2734 .padding_right(2) 2735 .padding_top(2) 2736 .padding_bottom(2) 2737 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 2738 } 2739 } 2740 } 2741 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2742 2743 2744 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_width_eq_4)2745 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_width_eq_4) { 2746 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2747 DWConv2DMicrokernelTester() 2748 .input_width(4) 2749 .input_height(2) 2750 .kernel_height(5) 2751 .kernel_width(5) 2752 .subsampling(1) 2753 .padding_left(2) 2754 .padding_right(2) 2755 .padding_top(2) 2756 .padding_bottom(2) 2757 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2758 } 2759 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_width_div_4)2760 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_width_div_4) { 2761 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2762 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2763 DWConv2DMicrokernelTester() 2764 .input_width(input_width) 2765 .input_height(2) 2766 .kernel_height(5) 2767 .kernel_width(5) 2768 .subsampling(1) 2769 .padding_left(2) 2770 .padding_right(2) 2771 .padding_top(2) 2772 .padding_bottom(2) 2773 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2774 } 2775 } 2776 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_width_lt_4)2777 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_width_lt_4) { 2778 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2779 for (size_t input_width = 1; input_width < 4; input_width++) { 2780 DWConv2DMicrokernelTester() 2781 .input_width(4) 2782 .input_height(2) 2783 .kernel_height(5) 2784 .kernel_width(5) 2785 .subsampling(1) 2786 .padding_left(2) 2787 .padding_right(2) 2788 .padding_top(2) 2789 .padding_bottom(2) 2790 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2791 } 2792 } 2793 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_width_gt_4)2794 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_width_gt_4) { 2795 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2796 for (size_t input_width = 5; input_width < 9; input_width++) { 2797 DWConv2DMicrokernelTester() 2798 .input_width(input_width) 2799 .input_height(2) 2800 .kernel_height(5) 2801 .kernel_width(5) 2802 .subsampling(1) 2803 .padding_left(2) 2804 .padding_right(2) 2805 .padding_top(2) 2806 .padding_bottom(2) 2807 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2808 } 2809 } 2810 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_height_div_2)2811 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_height_div_2) { 2812 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2813 for (size_t input_height = 4; input_height < 16; input_height += 2) { 2814 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2815 DWConv2DMicrokernelTester() 2816 .input_width(input_width) 2817 .input_height(input_height) 2818 .kernel_height(5) 2819 .kernel_width(5) 2820 .subsampling(1) 2821 .padding_left(2) 2822 .padding_right(2) 2823 .padding_top(2) 2824 .padding_bottom(2) 2825 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2826 } 2827 } 2828 } 2829 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_height_lt_2)2830 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_height_lt_2) { 2831 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2832 for (size_t input_height = 1; input_height < 2; input_height++) { 2833 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2834 DWConv2DMicrokernelTester() 2835 .input_width(input_width) 2836 .input_height(input_height) 2837 .kernel_height(5) 2838 .kernel_width(5) 2839 .subsampling(1) 2840 .padding_left(2) 2841 .padding_right(2) 2842 .padding_top(2) 2843 .padding_bottom(2) 2844 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2845 } 2846 } 2847 } 2848 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4,output_height_gt_2)2849 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4, output_height_gt_2) { 2850 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2851 for (size_t input_height = 3; input_height < 5; input_height++) { 2852 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2853 DWConv2DMicrokernelTester() 2854 .input_width(input_width) 2855 .input_height(input_height) 2856 .kernel_height(5) 2857 .kernel_width(5) 2858 .subsampling(1) 2859 .padding_left(2) 2860 .padding_right(2) 2861 .padding_top(2) 2862 .padding_bottom(2) 2863 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 2864 } 2865 } 2866 } 2867 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2868 2869 2870 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_width_eq_4)2871 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_width_eq_4) { 2872 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2873 DWConv2DMicrokernelTester() 2874 .input_width(4) 2875 .input_height(2) 2876 .kernel_height(5) 2877 .kernel_width(5) 2878 .subsampling(1) 2879 .padding_left(2) 2880 .padding_right(2) 2881 .padding_top(2) 2882 .padding_bottom(2) 2883 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2884 } 2885 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_width_div_4)2886 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_width_div_4) { 2887 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2888 for (size_t input_width = 8; input_width < 32; input_width += 4) { 2889 DWConv2DMicrokernelTester() 2890 .input_width(input_width) 2891 .input_height(2) 2892 .kernel_height(5) 2893 .kernel_width(5) 2894 .subsampling(1) 2895 .padding_left(2) 2896 .padding_right(2) 2897 .padding_top(2) 2898 .padding_bottom(2) 2899 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2900 } 2901 } 2902 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_width_lt_4)2903 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_width_lt_4) { 2904 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2905 for (size_t input_width = 1; input_width < 4; input_width++) { 2906 DWConv2DMicrokernelTester() 2907 .input_width(4) 2908 .input_height(2) 2909 .kernel_height(5) 2910 .kernel_width(5) 2911 .subsampling(1) 2912 .padding_left(2) 2913 .padding_right(2) 2914 .padding_top(2) 2915 .padding_bottom(2) 2916 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2917 } 2918 } 2919 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_width_gt_4)2920 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_width_gt_4) { 2921 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2922 for (size_t input_width = 5; input_width < 9; input_width++) { 2923 DWConv2DMicrokernelTester() 2924 .input_width(input_width) 2925 .input_height(2) 2926 .kernel_height(5) 2927 .kernel_width(5) 2928 .subsampling(1) 2929 .padding_left(2) 2930 .padding_right(2) 2931 .padding_top(2) 2932 .padding_bottom(2) 2933 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2934 } 2935 } 2936 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_height_div_2)2937 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_height_div_2) { 2938 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2939 for (size_t input_height = 4; input_height < 16; input_height += 2) { 2940 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2941 DWConv2DMicrokernelTester() 2942 .input_width(input_width) 2943 .input_height(input_height) 2944 .kernel_height(5) 2945 .kernel_width(5) 2946 .subsampling(1) 2947 .padding_left(2) 2948 .padding_right(2) 2949 .padding_top(2) 2950 .padding_bottom(2) 2951 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2952 } 2953 } 2954 } 2955 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_height_lt_2)2956 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_height_lt_2) { 2957 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2958 for (size_t input_height = 1; input_height < 2; input_height++) { 2959 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2960 DWConv2DMicrokernelTester() 2961 .input_width(input_width) 2962 .input_height(input_height) 2963 .kernel_height(5) 2964 .kernel_width(5) 2965 .subsampling(1) 2966 .padding_left(2) 2967 .padding_right(2) 2968 .padding_top(2) 2969 .padding_bottom(2) 2970 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2971 } 2972 } 2973 } 2974 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2,output_height_gt_2)2975 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC2, output_height_gt_2) { 2976 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2977 for (size_t input_height = 3; input_height < 5; input_height++) { 2978 for (size_t input_width = 1; input_width < 21; input_width += 3) { 2979 DWConv2DMicrokernelTester() 2980 .input_width(input_width) 2981 .input_height(input_height) 2982 .kernel_height(5) 2983 .kernel_width(5) 2984 .subsampling(1) 2985 .padding_left(2) 2986 .padding_right(2) 2987 .padding_top(2) 2988 .padding_bottom(2) 2989 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 2990 } 2991 } 2992 } 2993 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 2994 2995 2996 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_width_eq_4)2997 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_width_eq_4) { 2998 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2999 DWConv2DMicrokernelTester() 3000 .input_width(4) 3001 .input_height(2) 3002 .kernel_height(5) 3003 .kernel_width(5) 3004 .subsampling(1) 3005 .padding_left(2) 3006 .padding_right(2) 3007 .padding_top(2) 3008 .padding_bottom(2) 3009 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3010 } 3011 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_width_div_4)3012 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_width_div_4) { 3013 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3014 for (size_t input_width = 8; input_width < 32; input_width += 4) { 3015 DWConv2DMicrokernelTester() 3016 .input_width(input_width) 3017 .input_height(2) 3018 .kernel_height(5) 3019 .kernel_width(5) 3020 .subsampling(1) 3021 .padding_left(2) 3022 .padding_right(2) 3023 .padding_top(2) 3024 .padding_bottom(2) 3025 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3026 } 3027 } 3028 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_width_lt_4)3029 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_width_lt_4) { 3030 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3031 for (size_t input_width = 1; input_width < 4; input_width++) { 3032 DWConv2DMicrokernelTester() 3033 .input_width(4) 3034 .input_height(2) 3035 .kernel_height(5) 3036 .kernel_width(5) 3037 .subsampling(1) 3038 .padding_left(2) 3039 .padding_right(2) 3040 .padding_top(2) 3041 .padding_bottom(2) 3042 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3043 } 3044 } 3045 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_width_gt_4)3046 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_width_gt_4) { 3047 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3048 for (size_t input_width = 5; input_width < 9; input_width++) { 3049 DWConv2DMicrokernelTester() 3050 .input_width(input_width) 3051 .input_height(2) 3052 .kernel_height(5) 3053 .kernel_width(5) 3054 .subsampling(1) 3055 .padding_left(2) 3056 .padding_right(2) 3057 .padding_top(2) 3058 .padding_bottom(2) 3059 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3060 } 3061 } 3062 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_height_div_2)3063 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_height_div_2) { 3064 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3065 for (size_t input_height = 4; input_height < 16; input_height += 2) { 3066 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3067 DWConv2DMicrokernelTester() 3068 .input_width(input_width) 3069 .input_height(input_height) 3070 .kernel_height(5) 3071 .kernel_width(5) 3072 .subsampling(1) 3073 .padding_left(2) 3074 .padding_right(2) 3075 .padding_top(2) 3076 .padding_bottom(2) 3077 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3078 } 3079 } 3080 } 3081 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_height_lt_2)3082 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_height_lt_2) { 3083 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3084 for (size_t input_height = 1; input_height < 2; input_height++) { 3085 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3086 DWConv2DMicrokernelTester() 3087 .input_width(input_width) 3088 .input_height(input_height) 3089 .kernel_height(5) 3090 .kernel_width(5) 3091 .subsampling(1) 3092 .padding_left(2) 3093 .padding_right(2) 3094 .padding_top(2) 3095 .padding_bottom(2) 3096 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3097 } 3098 } 3099 } 3100 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3,output_height_gt_2)3101 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_2X4_ACC3, output_height_gt_2) { 3102 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3103 for (size_t input_height = 3; input_height < 5; input_height++) { 3104 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3105 DWConv2DMicrokernelTester() 3106 .input_width(input_width) 3107 .input_height(input_height) 3108 .kernel_height(5) 3109 .kernel_width(5) 3110 .subsampling(1) 3111 .padding_left(2) 3112 .padding_right(2) 3113 .padding_top(2) 3114 .padding_bottom(2) 3115 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 3116 } 3117 } 3118 } 3119 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3120 3121 3122 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_width_eq_4)3123 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_width_eq_4) { 3124 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3125 DWConv2DMicrokernelTester() 3126 .input_width(4) 3127 .input_height(3) 3128 .kernel_height(5) 3129 .kernel_width(5) 3130 .subsampling(1) 3131 .padding_left(2) 3132 .padding_right(2) 3133 .padding_top(2) 3134 .padding_bottom(2) 3135 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3136 } 3137 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_width_div_4)3138 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_width_div_4) { 3139 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3140 for (size_t input_width = 8; input_width < 32; input_width += 4) { 3141 DWConv2DMicrokernelTester() 3142 .input_width(input_width) 3143 .input_height(3) 3144 .kernel_height(5) 3145 .kernel_width(5) 3146 .subsampling(1) 3147 .padding_left(2) 3148 .padding_right(2) 3149 .padding_top(2) 3150 .padding_bottom(2) 3151 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3152 } 3153 } 3154 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_width_lt_4)3155 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_width_lt_4) { 3156 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3157 for (size_t input_width = 1; input_width < 4; input_width++) { 3158 DWConv2DMicrokernelTester() 3159 .input_width(4) 3160 .input_height(3) 3161 .kernel_height(5) 3162 .kernel_width(5) 3163 .subsampling(1) 3164 .padding_left(2) 3165 .padding_right(2) 3166 .padding_top(2) 3167 .padding_bottom(2) 3168 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3169 } 3170 } 3171 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_width_gt_4)3172 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_width_gt_4) { 3173 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3174 for (size_t input_width = 5; input_width < 9; input_width++) { 3175 DWConv2DMicrokernelTester() 3176 .input_width(input_width) 3177 .input_height(3) 3178 .kernel_height(5) 3179 .kernel_width(5) 3180 .subsampling(1) 3181 .padding_left(2) 3182 .padding_right(2) 3183 .padding_top(2) 3184 .padding_bottom(2) 3185 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3186 } 3187 } 3188 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_height_div_3)3189 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_height_div_3) { 3190 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3191 for (size_t input_height = 6; input_height < 24; input_height += 3) { 3192 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3193 DWConv2DMicrokernelTester() 3194 .input_width(input_width) 3195 .input_height(input_height) 3196 .kernel_height(5) 3197 .kernel_width(5) 3198 .subsampling(1) 3199 .padding_left(2) 3200 .padding_right(2) 3201 .padding_top(2) 3202 .padding_bottom(2) 3203 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3204 } 3205 } 3206 } 3207 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_height_lt_3)3208 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_height_lt_3) { 3209 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3210 for (size_t input_height = 1; input_height < 3; input_height++) { 3211 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3212 DWConv2DMicrokernelTester() 3213 .input_width(input_width) 3214 .input_height(input_height) 3215 .kernel_height(5) 3216 .kernel_width(5) 3217 .subsampling(1) 3218 .padding_left(2) 3219 .padding_right(2) 3220 .padding_top(2) 3221 .padding_bottom(2) 3222 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3223 } 3224 } 3225 } 3226 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4,output_height_gt_3)3227 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4, output_height_gt_3) { 3228 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3229 for (size_t input_height = 4; input_height < 7; input_height++) { 3230 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3231 DWConv2DMicrokernelTester() 3232 .input_width(input_width) 3233 .input_height(input_height) 3234 .kernel_height(5) 3235 .kernel_width(5) 3236 .subsampling(1) 3237 .padding_left(2) 3238 .padding_right(2) 3239 .padding_top(2) 3240 .padding_bottom(2) 3241 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 3242 } 3243 } 3244 } 3245 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3246 3247 3248 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_width_eq_4)3249 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_width_eq_4) { 3250 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3251 DWConv2DMicrokernelTester() 3252 .input_width(4) 3253 .input_height(3) 3254 .kernel_height(5) 3255 .kernel_width(5) 3256 .subsampling(1) 3257 .padding_left(2) 3258 .padding_right(2) 3259 .padding_top(2) 3260 .padding_bottom(2) 3261 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3262 } 3263 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_width_div_4)3264 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_width_div_4) { 3265 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3266 for (size_t input_width = 8; input_width < 32; input_width += 4) { 3267 DWConv2DMicrokernelTester() 3268 .input_width(input_width) 3269 .input_height(3) 3270 .kernel_height(5) 3271 .kernel_width(5) 3272 .subsampling(1) 3273 .padding_left(2) 3274 .padding_right(2) 3275 .padding_top(2) 3276 .padding_bottom(2) 3277 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3278 } 3279 } 3280 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_width_lt_4)3281 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_width_lt_4) { 3282 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3283 for (size_t input_width = 1; input_width < 4; input_width++) { 3284 DWConv2DMicrokernelTester() 3285 .input_width(4) 3286 .input_height(3) 3287 .kernel_height(5) 3288 .kernel_width(5) 3289 .subsampling(1) 3290 .padding_left(2) 3291 .padding_right(2) 3292 .padding_top(2) 3293 .padding_bottom(2) 3294 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3295 } 3296 } 3297 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_width_gt_4)3298 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_width_gt_4) { 3299 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3300 for (size_t input_width = 5; input_width < 9; input_width++) { 3301 DWConv2DMicrokernelTester() 3302 .input_width(input_width) 3303 .input_height(3) 3304 .kernel_height(5) 3305 .kernel_width(5) 3306 .subsampling(1) 3307 .padding_left(2) 3308 .padding_right(2) 3309 .padding_top(2) 3310 .padding_bottom(2) 3311 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3312 } 3313 } 3314 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_height_div_3)3315 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_height_div_3) { 3316 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3317 for (size_t input_height = 6; input_height < 24; input_height += 3) { 3318 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3319 DWConv2DMicrokernelTester() 3320 .input_width(input_width) 3321 .input_height(input_height) 3322 .kernel_height(5) 3323 .kernel_width(5) 3324 .subsampling(1) 3325 .padding_left(2) 3326 .padding_right(2) 3327 .padding_top(2) 3328 .padding_bottom(2) 3329 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3330 } 3331 } 3332 } 3333 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_height_lt_3)3334 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_height_lt_3) { 3335 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3336 for (size_t input_height = 1; input_height < 3; input_height++) { 3337 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3338 DWConv2DMicrokernelTester() 3339 .input_width(input_width) 3340 .input_height(input_height) 3341 .kernel_height(5) 3342 .kernel_width(5) 3343 .subsampling(1) 3344 .padding_left(2) 3345 .padding_right(2) 3346 .padding_top(2) 3347 .padding_bottom(2) 3348 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3349 } 3350 } 3351 } 3352 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2,output_height_gt_3)3353 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_3X4_ACC2, output_height_gt_3) { 3354 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3355 for (size_t input_height = 4; input_height < 7; input_height++) { 3356 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3357 DWConv2DMicrokernelTester() 3358 .input_width(input_width) 3359 .input_height(input_height) 3360 .kernel_height(5) 3361 .kernel_width(5) 3362 .subsampling(1) 3363 .padding_left(2) 3364 .padding_right(2) 3365 .padding_top(2) 3366 .padding_bottom(2) 3367 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 3368 } 3369 } 3370 } 3371 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3372 3373 3374 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_width_eq_4)3375 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_width_eq_4) { 3376 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3377 DWConv2DMicrokernelTester() 3378 .input_width(4) 3379 .input_height(4) 3380 .kernel_height(5) 3381 .kernel_width(5) 3382 .subsampling(1) 3383 .padding_left(2) 3384 .padding_right(2) 3385 .padding_top(2) 3386 .padding_bottom(2) 3387 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3388 } 3389 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_width_div_4)3390 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_width_div_4) { 3391 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3392 for (size_t input_width = 8; input_width < 32; input_width += 4) { 3393 DWConv2DMicrokernelTester() 3394 .input_width(input_width) 3395 .input_height(4) 3396 .kernel_height(5) 3397 .kernel_width(5) 3398 .subsampling(1) 3399 .padding_left(2) 3400 .padding_right(2) 3401 .padding_top(2) 3402 .padding_bottom(2) 3403 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3404 } 3405 } 3406 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_width_lt_4)3407 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_width_lt_4) { 3408 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3409 for (size_t input_width = 1; input_width < 4; input_width++) { 3410 DWConv2DMicrokernelTester() 3411 .input_width(4) 3412 .input_height(4) 3413 .kernel_height(5) 3414 .kernel_width(5) 3415 .subsampling(1) 3416 .padding_left(2) 3417 .padding_right(2) 3418 .padding_top(2) 3419 .padding_bottom(2) 3420 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3421 } 3422 } 3423 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_width_gt_4)3424 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_width_gt_4) { 3425 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3426 for (size_t input_width = 5; input_width < 9; input_width++) { 3427 DWConv2DMicrokernelTester() 3428 .input_width(input_width) 3429 .input_height(4) 3430 .kernel_height(5) 3431 .kernel_width(5) 3432 .subsampling(1) 3433 .padding_left(2) 3434 .padding_right(2) 3435 .padding_top(2) 3436 .padding_bottom(2) 3437 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3438 } 3439 } 3440 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_height_div_4)3441 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_height_div_4) { 3442 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3443 for (size_t input_height = 8; input_height < 32; input_height += 4) { 3444 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3445 DWConv2DMicrokernelTester() 3446 .input_width(input_width) 3447 .input_height(input_height) 3448 .kernel_height(5) 3449 .kernel_width(5) 3450 .subsampling(1) 3451 .padding_left(2) 3452 .padding_right(2) 3453 .padding_top(2) 3454 .padding_bottom(2) 3455 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3456 } 3457 } 3458 } 3459 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_height_lt_4)3460 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_height_lt_4) { 3461 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3462 for (size_t input_height = 1; input_height < 4; input_height++) { 3463 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3464 DWConv2DMicrokernelTester() 3465 .input_width(input_width) 3466 .input_height(input_height) 3467 .kernel_height(5) 3468 .kernel_width(5) 3469 .subsampling(1) 3470 .padding_left(2) 3471 .padding_right(2) 3472 .padding_top(2) 3473 .padding_bottom(2) 3474 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3475 } 3476 } 3477 } 3478 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4,output_height_gt_4)3479 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4, output_height_gt_4) { 3480 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3481 for (size_t input_height = 5; input_height < 9; input_height++) { 3482 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3483 DWConv2DMicrokernelTester() 3484 .input_width(input_width) 3485 .input_height(input_height) 3486 .kernel_height(5) 3487 .kernel_width(5) 3488 .subsampling(1) 3489 .padding_left(2) 3490 .padding_right(2) 3491 .padding_top(2) 3492 .padding_bottom(2) 3493 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4, xnn_init_f16_chw_params); 3494 } 3495 } 3496 } 3497 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3498 3499 3500 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_width_eq_4)3501 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_width_eq_4) { 3502 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3503 DWConv2DMicrokernelTester() 3504 .input_width(4) 3505 .input_height(4) 3506 .kernel_height(5) 3507 .kernel_width(5) 3508 .subsampling(1) 3509 .padding_left(2) 3510 .padding_right(2) 3511 .padding_top(2) 3512 .padding_bottom(2) 3513 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3514 } 3515 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_width_div_4)3516 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_width_div_4) { 3517 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3518 for (size_t input_width = 8; input_width < 32; input_width += 4) { 3519 DWConv2DMicrokernelTester() 3520 .input_width(input_width) 3521 .input_height(4) 3522 .kernel_height(5) 3523 .kernel_width(5) 3524 .subsampling(1) 3525 .padding_left(2) 3526 .padding_right(2) 3527 .padding_top(2) 3528 .padding_bottom(2) 3529 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3530 } 3531 } 3532 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_width_lt_4)3533 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_width_lt_4) { 3534 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3535 for (size_t input_width = 1; input_width < 4; input_width++) { 3536 DWConv2DMicrokernelTester() 3537 .input_width(4) 3538 .input_height(4) 3539 .kernel_height(5) 3540 .kernel_width(5) 3541 .subsampling(1) 3542 .padding_left(2) 3543 .padding_right(2) 3544 .padding_top(2) 3545 .padding_bottom(2) 3546 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3547 } 3548 } 3549 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_width_gt_4)3550 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_width_gt_4) { 3551 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3552 for (size_t input_width = 5; input_width < 9; input_width++) { 3553 DWConv2DMicrokernelTester() 3554 .input_width(input_width) 3555 .input_height(4) 3556 .kernel_height(5) 3557 .kernel_width(5) 3558 .subsampling(1) 3559 .padding_left(2) 3560 .padding_right(2) 3561 .padding_top(2) 3562 .padding_bottom(2) 3563 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3564 } 3565 } 3566 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_height_div_4)3567 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_height_div_4) { 3568 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3569 for (size_t input_height = 8; input_height < 32; input_height += 4) { 3570 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3571 DWConv2DMicrokernelTester() 3572 .input_width(input_width) 3573 .input_height(input_height) 3574 .kernel_height(5) 3575 .kernel_width(5) 3576 .subsampling(1) 3577 .padding_left(2) 3578 .padding_right(2) 3579 .padding_top(2) 3580 .padding_bottom(2) 3581 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3582 } 3583 } 3584 } 3585 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_height_lt_4)3586 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_height_lt_4) { 3587 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3588 for (size_t input_height = 1; input_height < 4; input_height++) { 3589 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3590 DWConv2DMicrokernelTester() 3591 .input_width(input_width) 3592 .input_height(input_height) 3593 .kernel_height(5) 3594 .kernel_width(5) 3595 .subsampling(1) 3596 .padding_left(2) 3597 .padding_right(2) 3598 .padding_top(2) 3599 .padding_bottom(2) 3600 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3601 } 3602 } 3603 } 3604 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2,output_height_gt_4)3605 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_4X4_ACC2, output_height_gt_4) { 3606 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3607 for (size_t input_height = 5; input_height < 9; input_height++) { 3608 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3609 DWConv2DMicrokernelTester() 3610 .input_width(input_width) 3611 .input_height(input_height) 3612 .kernel_height(5) 3613 .kernel_width(5) 3614 .subsampling(1) 3615 .padding_left(2) 3616 .padding_right(2) 3617 .padding_top(2) 3618 .padding_bottom(2) 3619 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_4x4_acc2, xnn_init_f16_chw_params); 3620 } 3621 } 3622 } 3623 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3624 3625 3626 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_width_eq_4)3627 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_width_eq_4) { 3628 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3629 DWConv2DMicrokernelTester() 3630 .input_width(4) 3631 .input_height(5) 3632 .kernel_height(5) 3633 .kernel_width(5) 3634 .subsampling(1) 3635 .padding_left(2) 3636 .padding_right(2) 3637 .padding_top(2) 3638 .padding_bottom(2) 3639 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3640 } 3641 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_width_div_4)3642 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_width_div_4) { 3643 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3644 for (size_t input_width = 8; input_width < 32; input_width += 4) { 3645 DWConv2DMicrokernelTester() 3646 .input_width(input_width) 3647 .input_height(5) 3648 .kernel_height(5) 3649 .kernel_width(5) 3650 .subsampling(1) 3651 .padding_left(2) 3652 .padding_right(2) 3653 .padding_top(2) 3654 .padding_bottom(2) 3655 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3656 } 3657 } 3658 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_width_lt_4)3659 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_width_lt_4) { 3660 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3661 for (size_t input_width = 1; input_width < 4; input_width++) { 3662 DWConv2DMicrokernelTester() 3663 .input_width(4) 3664 .input_height(5) 3665 .kernel_height(5) 3666 .kernel_width(5) 3667 .subsampling(1) 3668 .padding_left(2) 3669 .padding_right(2) 3670 .padding_top(2) 3671 .padding_bottom(2) 3672 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3673 } 3674 } 3675 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_width_gt_4)3676 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_width_gt_4) { 3677 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3678 for (size_t input_width = 5; input_width < 9; input_width++) { 3679 DWConv2DMicrokernelTester() 3680 .input_width(input_width) 3681 .input_height(5) 3682 .kernel_height(5) 3683 .kernel_width(5) 3684 .subsampling(1) 3685 .padding_left(2) 3686 .padding_right(2) 3687 .padding_top(2) 3688 .padding_bottom(2) 3689 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3690 } 3691 } 3692 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_height_div_5)3693 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_height_div_5) { 3694 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3695 for (size_t input_height = 10; input_height < 40; input_height += 5) { 3696 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3697 DWConv2DMicrokernelTester() 3698 .input_width(input_width) 3699 .input_height(input_height) 3700 .kernel_height(5) 3701 .kernel_width(5) 3702 .subsampling(1) 3703 .padding_left(2) 3704 .padding_right(2) 3705 .padding_top(2) 3706 .padding_bottom(2) 3707 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3708 } 3709 } 3710 } 3711 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_height_lt_5)3712 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_height_lt_5) { 3713 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3714 for (size_t input_height = 1; input_height < 5; input_height++) { 3715 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3716 DWConv2DMicrokernelTester() 3717 .input_width(input_width) 3718 .input_height(input_height) 3719 .kernel_height(5) 3720 .kernel_width(5) 3721 .subsampling(1) 3722 .padding_left(2) 3723 .padding_right(2) 3724 .padding_top(2) 3725 .padding_bottom(2) 3726 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3727 } 3728 } 3729 } 3730 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4,output_height_gt_5)3731 TEST(F16_DWCONV2D_CHW_5X5P2__NEONFP16ARITH_5X4, output_height_gt_5) { 3732 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3733 for (size_t input_height = 6; input_height < 11; input_height++) { 3734 for (size_t input_width = 1; input_width < 21; input_width += 3) { 3735 DWConv2DMicrokernelTester() 3736 .input_width(input_width) 3737 .input_height(input_height) 3738 .kernel_height(5) 3739 .kernel_width(5) 3740 .subsampling(1) 3741 .padding_left(2) 3742 .padding_right(2) 3743 .padding_top(2) 3744 .padding_bottom(2) 3745 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5p2__neonfp16arith_5x4, xnn_init_f16_chw_params); 3746 } 3747 } 3748 } 3749 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3750 3751 3752 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,output_width_eq_4)3753 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, output_width_eq_4) { 3754 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3755 for (size_t input_width = 7; input_width < 9; input_width++) { 3756 DWConv2DMicrokernelTester() 3757 .input_width(input_width) 3758 .input_height(2) 3759 .kernel_height(5) 3760 .kernel_width(5) 3761 .subsampling(2) 3762 .padding_left(2) 3763 .padding_right(2) 3764 .padding_top(2) 3765 .padding_bottom(2) 3766 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3767 } 3768 } 3769 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,output_width_div_4)3770 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, output_width_div_4) { 3771 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3772 for (size_t input_width = 16; input_width < 64; input_width += 8) { 3773 DWConv2DMicrokernelTester() 3774 .input_width(input_width) 3775 .input_height(2) 3776 .kernel_height(5) 3777 .kernel_width(5) 3778 .subsampling(2) 3779 .padding_left(2) 3780 .padding_right(2) 3781 .padding_top(2) 3782 .padding_bottom(2) 3783 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3784 } 3785 } 3786 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,output_width_lt_4)3787 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, output_width_lt_4) { 3788 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3789 for (size_t input_width = 1; input_width < 7; input_width++) { 3790 DWConv2DMicrokernelTester() 3791 .input_width(8) 3792 .input_height(2) 3793 .kernel_height(5) 3794 .kernel_width(5) 3795 .subsampling(2) 3796 .padding_left(2) 3797 .padding_right(2) 3798 .padding_top(2) 3799 .padding_bottom(2) 3800 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3801 } 3802 } 3803 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,output_width_gt_4)3804 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, output_width_gt_4) { 3805 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3806 for (size_t input_width = 9; input_width < 17; input_width++) { 3807 DWConv2DMicrokernelTester() 3808 .input_width(input_width) 3809 .input_height(2) 3810 .kernel_height(5) 3811 .kernel_width(5) 3812 .subsampling(2) 3813 .padding_left(2) 3814 .padding_right(2) 3815 .padding_top(2) 3816 .padding_bottom(2) 3817 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3818 } 3819 } 3820 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,output_height_eq_1)3821 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, output_height_eq_1) { 3822 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3823 for (size_t input_height = 1; input_height < 3; input_height++) { 3824 for (size_t input_width = 1; input_width < 41; input_width += 7) { 3825 DWConv2DMicrokernelTester() 3826 .input_width(input_width) 3827 .input_height(input_height) 3828 .kernel_height(5) 3829 .kernel_width(5) 3830 .subsampling(2) 3831 .padding_left(2) 3832 .padding_right(2) 3833 .padding_top(2) 3834 .padding_bottom(2) 3835 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3836 } 3837 } 3838 } 3839 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,output_height_gt_1)3840 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, output_height_gt_1) { 3841 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3842 for (size_t input_height = 3; input_height < 5; input_height++) { 3843 for (size_t input_width = 1; input_width < 41; input_width += 7) { 3844 DWConv2DMicrokernelTester() 3845 .input_width(input_width) 3846 .input_height(input_height) 3847 .kernel_height(5) 3848 .kernel_width(5) 3849 .subsampling(2) 3850 .padding_left(2) 3851 .padding_right(2) 3852 .padding_top(2) 3853 .padding_bottom(2) 3854 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3855 } 3856 } 3857 } 3858 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4,padding_top_eq_1)3859 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4, padding_top_eq_1) { 3860 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3861 for (size_t input_height = 2; input_height < 8; input_height++) { 3862 for (size_t input_width = 1; input_width < 41; input_width += 7) { 3863 DWConv2DMicrokernelTester() 3864 .input_width(input_width) 3865 .input_height(input_height) 3866 .kernel_height(5) 3867 .kernel_width(5) 3868 .subsampling(2) 3869 .padding_left(2) 3870 .padding_right(2) 3871 .padding_top(1) 3872 .padding_bottom(2) 3873 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4, xnn_init_f16_chw_params); 3874 } 3875 } 3876 } 3877 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 3878 3879 3880 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,output_width_eq_4)3881 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, output_width_eq_4) { 3882 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3883 for (size_t input_width = 7; input_width < 9; input_width++) { 3884 DWConv2DMicrokernelTester() 3885 .input_width(input_width) 3886 .input_height(2) 3887 .kernel_height(5) 3888 .kernel_width(5) 3889 .subsampling(2) 3890 .padding_left(2) 3891 .padding_right(2) 3892 .padding_top(2) 3893 .padding_bottom(2) 3894 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 3895 } 3896 } 3897 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,output_width_div_4)3898 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, output_width_div_4) { 3899 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3900 for (size_t input_width = 16; input_width < 64; input_width += 8) { 3901 DWConv2DMicrokernelTester() 3902 .input_width(input_width) 3903 .input_height(2) 3904 .kernel_height(5) 3905 .kernel_width(5) 3906 .subsampling(2) 3907 .padding_left(2) 3908 .padding_right(2) 3909 .padding_top(2) 3910 .padding_bottom(2) 3911 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 3912 } 3913 } 3914 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,output_width_lt_4)3915 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, output_width_lt_4) { 3916 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3917 for (size_t input_width = 1; input_width < 7; input_width++) { 3918 DWConv2DMicrokernelTester() 3919 .input_width(8) 3920 .input_height(2) 3921 .kernel_height(5) 3922 .kernel_width(5) 3923 .subsampling(2) 3924 .padding_left(2) 3925 .padding_right(2) 3926 .padding_top(2) 3927 .padding_bottom(2) 3928 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 3929 } 3930 } 3931 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,output_width_gt_4)3932 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, output_width_gt_4) { 3933 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3934 for (size_t input_width = 9; input_width < 17; input_width++) { 3935 DWConv2DMicrokernelTester() 3936 .input_width(input_width) 3937 .input_height(2) 3938 .kernel_height(5) 3939 .kernel_width(5) 3940 .subsampling(2) 3941 .padding_left(2) 3942 .padding_right(2) 3943 .padding_top(2) 3944 .padding_bottom(2) 3945 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 3946 } 3947 } 3948 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,output_height_eq_1)3949 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, output_height_eq_1) { 3950 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3951 for (size_t input_height = 1; input_height < 3; input_height++) { 3952 for (size_t input_width = 1; input_width < 41; input_width += 7) { 3953 DWConv2DMicrokernelTester() 3954 .input_width(input_width) 3955 .input_height(input_height) 3956 .kernel_height(5) 3957 .kernel_width(5) 3958 .subsampling(2) 3959 .padding_left(2) 3960 .padding_right(2) 3961 .padding_top(2) 3962 .padding_bottom(2) 3963 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 3964 } 3965 } 3966 } 3967 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,output_height_gt_1)3968 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, output_height_gt_1) { 3969 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3970 for (size_t input_height = 3; input_height < 5; input_height++) { 3971 for (size_t input_width = 1; input_width < 41; input_width += 7) { 3972 DWConv2DMicrokernelTester() 3973 .input_width(input_width) 3974 .input_height(input_height) 3975 .kernel_height(5) 3976 .kernel_width(5) 3977 .subsampling(2) 3978 .padding_left(2) 3979 .padding_right(2) 3980 .padding_top(2) 3981 .padding_bottom(2) 3982 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 3983 } 3984 } 3985 } 3986 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2,padding_top_eq_1)3987 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC2, padding_top_eq_1) { 3988 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3989 for (size_t input_height = 2; input_height < 8; input_height++) { 3990 for (size_t input_width = 1; input_width < 41; input_width += 7) { 3991 DWConv2DMicrokernelTester() 3992 .input_width(input_width) 3993 .input_height(input_height) 3994 .kernel_height(5) 3995 .kernel_width(5) 3996 .subsampling(2) 3997 .padding_left(2) 3998 .padding_right(2) 3999 .padding_top(1) 4000 .padding_bottom(2) 4001 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc2, xnn_init_f16_chw_params); 4002 } 4003 } 4004 } 4005 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4006 4007 4008 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,output_width_eq_4)4009 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, output_width_eq_4) { 4010 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4011 for (size_t input_width = 7; input_width < 9; input_width++) { 4012 DWConv2DMicrokernelTester() 4013 .input_width(input_width) 4014 .input_height(2) 4015 .kernel_height(5) 4016 .kernel_width(5) 4017 .subsampling(2) 4018 .padding_left(2) 4019 .padding_right(2) 4020 .padding_top(2) 4021 .padding_bottom(2) 4022 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4023 } 4024 } 4025 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,output_width_div_4)4026 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, output_width_div_4) { 4027 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4028 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4029 DWConv2DMicrokernelTester() 4030 .input_width(input_width) 4031 .input_height(2) 4032 .kernel_height(5) 4033 .kernel_width(5) 4034 .subsampling(2) 4035 .padding_left(2) 4036 .padding_right(2) 4037 .padding_top(2) 4038 .padding_bottom(2) 4039 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4040 } 4041 } 4042 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,output_width_lt_4)4043 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, output_width_lt_4) { 4044 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4045 for (size_t input_width = 1; input_width < 7; input_width++) { 4046 DWConv2DMicrokernelTester() 4047 .input_width(8) 4048 .input_height(2) 4049 .kernel_height(5) 4050 .kernel_width(5) 4051 .subsampling(2) 4052 .padding_left(2) 4053 .padding_right(2) 4054 .padding_top(2) 4055 .padding_bottom(2) 4056 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4057 } 4058 } 4059 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,output_width_gt_4)4060 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, output_width_gt_4) { 4061 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4062 for (size_t input_width = 9; input_width < 17; input_width++) { 4063 DWConv2DMicrokernelTester() 4064 .input_width(input_width) 4065 .input_height(2) 4066 .kernel_height(5) 4067 .kernel_width(5) 4068 .subsampling(2) 4069 .padding_left(2) 4070 .padding_right(2) 4071 .padding_top(2) 4072 .padding_bottom(2) 4073 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4074 } 4075 } 4076 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,output_height_eq_1)4077 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, output_height_eq_1) { 4078 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4079 for (size_t input_height = 1; input_height < 3; input_height++) { 4080 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4081 DWConv2DMicrokernelTester() 4082 .input_width(input_width) 4083 .input_height(input_height) 4084 .kernel_height(5) 4085 .kernel_width(5) 4086 .subsampling(2) 4087 .padding_left(2) 4088 .padding_right(2) 4089 .padding_top(2) 4090 .padding_bottom(2) 4091 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4092 } 4093 } 4094 } 4095 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,output_height_gt_1)4096 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, output_height_gt_1) { 4097 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4098 for (size_t input_height = 3; input_height < 5; input_height++) { 4099 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4100 DWConv2DMicrokernelTester() 4101 .input_width(input_width) 4102 .input_height(input_height) 4103 .kernel_height(5) 4104 .kernel_width(5) 4105 .subsampling(2) 4106 .padding_left(2) 4107 .padding_right(2) 4108 .padding_top(2) 4109 .padding_bottom(2) 4110 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4111 } 4112 } 4113 } 4114 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3,padding_top_eq_1)4115 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC3, padding_top_eq_1) { 4116 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4117 for (size_t input_height = 2; input_height < 8; input_height++) { 4118 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4119 DWConv2DMicrokernelTester() 4120 .input_width(input_width) 4121 .input_height(input_height) 4122 .kernel_height(5) 4123 .kernel_width(5) 4124 .subsampling(2) 4125 .padding_left(2) 4126 .padding_right(2) 4127 .padding_top(1) 4128 .padding_bottom(2) 4129 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc3, xnn_init_f16_chw_params); 4130 } 4131 } 4132 } 4133 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4134 4135 4136 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,output_width_eq_4)4137 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, output_width_eq_4) { 4138 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4139 for (size_t input_width = 7; input_width < 9; input_width++) { 4140 DWConv2DMicrokernelTester() 4141 .input_width(input_width) 4142 .input_height(2) 4143 .kernel_height(5) 4144 .kernel_width(5) 4145 .subsampling(2) 4146 .padding_left(2) 4147 .padding_right(2) 4148 .padding_top(2) 4149 .padding_bottom(2) 4150 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4151 } 4152 } 4153 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,output_width_div_4)4154 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, output_width_div_4) { 4155 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4156 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4157 DWConv2DMicrokernelTester() 4158 .input_width(input_width) 4159 .input_height(2) 4160 .kernel_height(5) 4161 .kernel_width(5) 4162 .subsampling(2) 4163 .padding_left(2) 4164 .padding_right(2) 4165 .padding_top(2) 4166 .padding_bottom(2) 4167 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4168 } 4169 } 4170 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,output_width_lt_4)4171 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, output_width_lt_4) { 4172 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4173 for (size_t input_width = 1; input_width < 7; input_width++) { 4174 DWConv2DMicrokernelTester() 4175 .input_width(8) 4176 .input_height(2) 4177 .kernel_height(5) 4178 .kernel_width(5) 4179 .subsampling(2) 4180 .padding_left(2) 4181 .padding_right(2) 4182 .padding_top(2) 4183 .padding_bottom(2) 4184 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4185 } 4186 } 4187 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,output_width_gt_4)4188 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, output_width_gt_4) { 4189 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4190 for (size_t input_width = 9; input_width < 17; input_width++) { 4191 DWConv2DMicrokernelTester() 4192 .input_width(input_width) 4193 .input_height(2) 4194 .kernel_height(5) 4195 .kernel_width(5) 4196 .subsampling(2) 4197 .padding_left(2) 4198 .padding_right(2) 4199 .padding_top(2) 4200 .padding_bottom(2) 4201 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4202 } 4203 } 4204 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,output_height_eq_1)4205 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, output_height_eq_1) { 4206 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4207 for (size_t input_height = 1; input_height < 3; input_height++) { 4208 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4209 DWConv2DMicrokernelTester() 4210 .input_width(input_width) 4211 .input_height(input_height) 4212 .kernel_height(5) 4213 .kernel_width(5) 4214 .subsampling(2) 4215 .padding_left(2) 4216 .padding_right(2) 4217 .padding_top(2) 4218 .padding_bottom(2) 4219 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4220 } 4221 } 4222 } 4223 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,output_height_gt_1)4224 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, output_height_gt_1) { 4225 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4226 for (size_t input_height = 3; input_height < 5; input_height++) { 4227 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4228 DWConv2DMicrokernelTester() 4229 .input_width(input_width) 4230 .input_height(input_height) 4231 .kernel_height(5) 4232 .kernel_width(5) 4233 .subsampling(2) 4234 .padding_left(2) 4235 .padding_right(2) 4236 .padding_top(2) 4237 .padding_bottom(2) 4238 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4239 } 4240 } 4241 } 4242 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4,padding_top_eq_1)4243 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC4, padding_top_eq_1) { 4244 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4245 for (size_t input_height = 2; input_height < 8; input_height++) { 4246 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4247 DWConv2DMicrokernelTester() 4248 .input_width(input_width) 4249 .input_height(input_height) 4250 .kernel_height(5) 4251 .kernel_width(5) 4252 .subsampling(2) 4253 .padding_left(2) 4254 .padding_right(2) 4255 .padding_top(1) 4256 .padding_bottom(2) 4257 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc4, xnn_init_f16_chw_params); 4258 } 4259 } 4260 } 4261 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4262 4263 4264 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,output_width_eq_4)4265 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, output_width_eq_4) { 4266 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4267 for (size_t input_width = 7; input_width < 9; input_width++) { 4268 DWConv2DMicrokernelTester() 4269 .input_width(input_width) 4270 .input_height(2) 4271 .kernel_height(5) 4272 .kernel_width(5) 4273 .subsampling(2) 4274 .padding_left(2) 4275 .padding_right(2) 4276 .padding_top(2) 4277 .padding_bottom(2) 4278 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4279 } 4280 } 4281 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,output_width_div_4)4282 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, output_width_div_4) { 4283 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4284 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4285 DWConv2DMicrokernelTester() 4286 .input_width(input_width) 4287 .input_height(2) 4288 .kernel_height(5) 4289 .kernel_width(5) 4290 .subsampling(2) 4291 .padding_left(2) 4292 .padding_right(2) 4293 .padding_top(2) 4294 .padding_bottom(2) 4295 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4296 } 4297 } 4298 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,output_width_lt_4)4299 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, output_width_lt_4) { 4300 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4301 for (size_t input_width = 1; input_width < 7; input_width++) { 4302 DWConv2DMicrokernelTester() 4303 .input_width(8) 4304 .input_height(2) 4305 .kernel_height(5) 4306 .kernel_width(5) 4307 .subsampling(2) 4308 .padding_left(2) 4309 .padding_right(2) 4310 .padding_top(2) 4311 .padding_bottom(2) 4312 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4313 } 4314 } 4315 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,output_width_gt_4)4316 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, output_width_gt_4) { 4317 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4318 for (size_t input_width = 9; input_width < 17; input_width++) { 4319 DWConv2DMicrokernelTester() 4320 .input_width(input_width) 4321 .input_height(2) 4322 .kernel_height(5) 4323 .kernel_width(5) 4324 .subsampling(2) 4325 .padding_left(2) 4326 .padding_right(2) 4327 .padding_top(2) 4328 .padding_bottom(2) 4329 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4330 } 4331 } 4332 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,output_height_eq_1)4333 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, output_height_eq_1) { 4334 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4335 for (size_t input_height = 1; input_height < 3; input_height++) { 4336 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4337 DWConv2DMicrokernelTester() 4338 .input_width(input_width) 4339 .input_height(input_height) 4340 .kernel_height(5) 4341 .kernel_width(5) 4342 .subsampling(2) 4343 .padding_left(2) 4344 .padding_right(2) 4345 .padding_top(2) 4346 .padding_bottom(2) 4347 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4348 } 4349 } 4350 } 4351 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,output_height_gt_1)4352 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, output_height_gt_1) { 4353 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4354 for (size_t input_height = 3; input_height < 5; input_height++) { 4355 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4356 DWConv2DMicrokernelTester() 4357 .input_width(input_width) 4358 .input_height(input_height) 4359 .kernel_height(5) 4360 .kernel_width(5) 4361 .subsampling(2) 4362 .padding_left(2) 4363 .padding_right(2) 4364 .padding_top(2) 4365 .padding_bottom(2) 4366 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4367 } 4368 } 4369 } 4370 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5,padding_top_eq_1)4371 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_1X4_ACC5, padding_top_eq_1) { 4372 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4373 for (size_t input_height = 2; input_height < 8; input_height++) { 4374 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4375 DWConv2DMicrokernelTester() 4376 .input_width(input_width) 4377 .input_height(input_height) 4378 .kernel_height(5) 4379 .kernel_width(5) 4380 .subsampling(2) 4381 .padding_left(2) 4382 .padding_right(2) 4383 .padding_top(1) 4384 .padding_bottom(2) 4385 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_1x4_acc5, xnn_init_f16_chw_params); 4386 } 4387 } 4388 } 4389 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4390 4391 4392 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_width_eq_4)4393 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_width_eq_4) { 4394 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4395 for (size_t input_width = 7; input_width < 9; input_width++) { 4396 DWConv2DMicrokernelTester() 4397 .input_width(input_width) 4398 .input_height(4) 4399 .kernel_height(5) 4400 .kernel_width(5) 4401 .subsampling(2) 4402 .padding_left(2) 4403 .padding_right(2) 4404 .padding_top(2) 4405 .padding_bottom(2) 4406 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4407 } 4408 } 4409 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_width_div_4)4410 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_width_div_4) { 4411 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4412 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4413 DWConv2DMicrokernelTester() 4414 .input_width(input_width) 4415 .input_height(4) 4416 .kernel_height(5) 4417 .kernel_width(5) 4418 .subsampling(2) 4419 .padding_left(2) 4420 .padding_right(2) 4421 .padding_top(2) 4422 .padding_bottom(2) 4423 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4424 } 4425 } 4426 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_width_lt_4)4427 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_width_lt_4) { 4428 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4429 for (size_t input_width = 1; input_width < 7; input_width++) { 4430 DWConv2DMicrokernelTester() 4431 .input_width(8) 4432 .input_height(4) 4433 .kernel_height(5) 4434 .kernel_width(5) 4435 .subsampling(2) 4436 .padding_left(2) 4437 .padding_right(2) 4438 .padding_top(2) 4439 .padding_bottom(2) 4440 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4441 } 4442 } 4443 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_width_gt_4)4444 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_width_gt_4) { 4445 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4446 for (size_t input_width = 9; input_width < 17; input_width++) { 4447 DWConv2DMicrokernelTester() 4448 .input_width(input_width) 4449 .input_height(4) 4450 .kernel_height(5) 4451 .kernel_width(5) 4452 .subsampling(2) 4453 .padding_left(2) 4454 .padding_right(2) 4455 .padding_top(2) 4456 .padding_bottom(2) 4457 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4458 } 4459 } 4460 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_height_eq_2)4461 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_height_eq_2) { 4462 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4463 for (size_t input_height = 3; input_height < 5; input_height++) { 4464 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4465 DWConv2DMicrokernelTester() 4466 .input_width(input_width) 4467 .input_height(input_height) 4468 .kernel_height(5) 4469 .kernel_width(5) 4470 .subsampling(2) 4471 .padding_left(2) 4472 .padding_right(2) 4473 .padding_top(2) 4474 .padding_bottom(2) 4475 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4476 } 4477 } 4478 } 4479 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_height_div_2)4480 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_height_div_2) { 4481 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4482 for (size_t input_height = 8; input_height < 32; input_height += 4) { 4483 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4484 DWConv2DMicrokernelTester() 4485 .input_width(input_width) 4486 .input_height(input_height) 4487 .kernel_height(5) 4488 .kernel_width(5) 4489 .subsampling(2) 4490 .padding_left(2) 4491 .padding_right(2) 4492 .padding_top(2) 4493 .padding_bottom(2) 4494 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4495 } 4496 } 4497 } 4498 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_height_lt_2)4499 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_height_lt_2) { 4500 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4501 for (size_t input_height = 1; input_height < 3; input_height++) { 4502 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4503 DWConv2DMicrokernelTester() 4504 .input_width(input_width) 4505 .input_height(input_height) 4506 .kernel_height(5) 4507 .kernel_width(5) 4508 .subsampling(2) 4509 .padding_left(2) 4510 .padding_right(2) 4511 .padding_top(2) 4512 .padding_bottom(2) 4513 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4514 } 4515 } 4516 } 4517 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,output_height_gt_2)4518 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, output_height_gt_2) { 4519 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4520 for (size_t input_height = 5; input_height < 9; input_height++) { 4521 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4522 DWConv2DMicrokernelTester() 4523 .input_width(input_width) 4524 .input_height(input_height) 4525 .kernel_height(5) 4526 .kernel_width(5) 4527 .subsampling(2) 4528 .padding_left(2) 4529 .padding_right(2) 4530 .padding_top(2) 4531 .padding_bottom(2) 4532 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4533 } 4534 } 4535 } 4536 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4,padding_top_eq_1)4537 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4, padding_top_eq_1) { 4538 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4539 for (size_t input_height = 2; input_height < 14; input_height++) { 4540 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4541 DWConv2DMicrokernelTester() 4542 .input_width(input_width) 4543 .input_height(input_height) 4544 .kernel_height(5) 4545 .kernel_width(5) 4546 .subsampling(2) 4547 .padding_left(2) 4548 .padding_right(2) 4549 .padding_top(1) 4550 .padding_bottom(2) 4551 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4, xnn_init_f16_chw_params); 4552 } 4553 } 4554 } 4555 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4556 4557 4558 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_width_eq_4)4559 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_width_eq_4) { 4560 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4561 for (size_t input_width = 7; input_width < 9; input_width++) { 4562 DWConv2DMicrokernelTester() 4563 .input_width(input_width) 4564 .input_height(4) 4565 .kernel_height(5) 4566 .kernel_width(5) 4567 .subsampling(2) 4568 .padding_left(2) 4569 .padding_right(2) 4570 .padding_top(2) 4571 .padding_bottom(2) 4572 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4573 } 4574 } 4575 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_width_div_4)4576 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_width_div_4) { 4577 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4578 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4579 DWConv2DMicrokernelTester() 4580 .input_width(input_width) 4581 .input_height(4) 4582 .kernel_height(5) 4583 .kernel_width(5) 4584 .subsampling(2) 4585 .padding_left(2) 4586 .padding_right(2) 4587 .padding_top(2) 4588 .padding_bottom(2) 4589 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4590 } 4591 } 4592 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_width_lt_4)4593 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_width_lt_4) { 4594 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4595 for (size_t input_width = 1; input_width < 7; input_width++) { 4596 DWConv2DMicrokernelTester() 4597 .input_width(8) 4598 .input_height(4) 4599 .kernel_height(5) 4600 .kernel_width(5) 4601 .subsampling(2) 4602 .padding_left(2) 4603 .padding_right(2) 4604 .padding_top(2) 4605 .padding_bottom(2) 4606 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4607 } 4608 } 4609 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_width_gt_4)4610 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_width_gt_4) { 4611 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4612 for (size_t input_width = 9; input_width < 17; input_width++) { 4613 DWConv2DMicrokernelTester() 4614 .input_width(input_width) 4615 .input_height(4) 4616 .kernel_height(5) 4617 .kernel_width(5) 4618 .subsampling(2) 4619 .padding_left(2) 4620 .padding_right(2) 4621 .padding_top(2) 4622 .padding_bottom(2) 4623 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4624 } 4625 } 4626 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_height_eq_2)4627 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_height_eq_2) { 4628 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4629 for (size_t input_height = 3; input_height < 5; input_height++) { 4630 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4631 DWConv2DMicrokernelTester() 4632 .input_width(input_width) 4633 .input_height(input_height) 4634 .kernel_height(5) 4635 .kernel_width(5) 4636 .subsampling(2) 4637 .padding_left(2) 4638 .padding_right(2) 4639 .padding_top(2) 4640 .padding_bottom(2) 4641 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4642 } 4643 } 4644 } 4645 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_height_div_2)4646 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_height_div_2) { 4647 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4648 for (size_t input_height = 8; input_height < 32; input_height += 4) { 4649 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4650 DWConv2DMicrokernelTester() 4651 .input_width(input_width) 4652 .input_height(input_height) 4653 .kernel_height(5) 4654 .kernel_width(5) 4655 .subsampling(2) 4656 .padding_left(2) 4657 .padding_right(2) 4658 .padding_top(2) 4659 .padding_bottom(2) 4660 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4661 } 4662 } 4663 } 4664 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_height_lt_2)4665 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_height_lt_2) { 4666 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4667 for (size_t input_height = 1; input_height < 3; input_height++) { 4668 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4669 DWConv2DMicrokernelTester() 4670 .input_width(input_width) 4671 .input_height(input_height) 4672 .kernel_height(5) 4673 .kernel_width(5) 4674 .subsampling(2) 4675 .padding_left(2) 4676 .padding_right(2) 4677 .padding_top(2) 4678 .padding_bottom(2) 4679 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4680 } 4681 } 4682 } 4683 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,output_height_gt_2)4684 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, output_height_gt_2) { 4685 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4686 for (size_t input_height = 5; input_height < 9; input_height++) { 4687 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4688 DWConv2DMicrokernelTester() 4689 .input_width(input_width) 4690 .input_height(input_height) 4691 .kernel_height(5) 4692 .kernel_width(5) 4693 .subsampling(2) 4694 .padding_left(2) 4695 .padding_right(2) 4696 .padding_top(2) 4697 .padding_bottom(2) 4698 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4699 } 4700 } 4701 } 4702 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2,padding_top_eq_1)4703 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC2, padding_top_eq_1) { 4704 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4705 for (size_t input_height = 2; input_height < 14; input_height++) { 4706 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4707 DWConv2DMicrokernelTester() 4708 .input_width(input_width) 4709 .input_height(input_height) 4710 .kernel_height(5) 4711 .kernel_width(5) 4712 .subsampling(2) 4713 .padding_left(2) 4714 .padding_right(2) 4715 .padding_top(1) 4716 .padding_bottom(2) 4717 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc2, xnn_init_f16_chw_params); 4718 } 4719 } 4720 } 4721 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4722 4723 4724 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_width_eq_4)4725 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_width_eq_4) { 4726 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4727 for (size_t input_width = 7; input_width < 9; input_width++) { 4728 DWConv2DMicrokernelTester() 4729 .input_width(input_width) 4730 .input_height(4) 4731 .kernel_height(5) 4732 .kernel_width(5) 4733 .subsampling(2) 4734 .padding_left(2) 4735 .padding_right(2) 4736 .padding_top(2) 4737 .padding_bottom(2) 4738 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4739 } 4740 } 4741 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_width_div_4)4742 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_width_div_4) { 4743 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4744 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4745 DWConv2DMicrokernelTester() 4746 .input_width(input_width) 4747 .input_height(4) 4748 .kernel_height(5) 4749 .kernel_width(5) 4750 .subsampling(2) 4751 .padding_left(2) 4752 .padding_right(2) 4753 .padding_top(2) 4754 .padding_bottom(2) 4755 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4756 } 4757 } 4758 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_width_lt_4)4759 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_width_lt_4) { 4760 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4761 for (size_t input_width = 1; input_width < 7; input_width++) { 4762 DWConv2DMicrokernelTester() 4763 .input_width(8) 4764 .input_height(4) 4765 .kernel_height(5) 4766 .kernel_width(5) 4767 .subsampling(2) 4768 .padding_left(2) 4769 .padding_right(2) 4770 .padding_top(2) 4771 .padding_bottom(2) 4772 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4773 } 4774 } 4775 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_width_gt_4)4776 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_width_gt_4) { 4777 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4778 for (size_t input_width = 9; input_width < 17; input_width++) { 4779 DWConv2DMicrokernelTester() 4780 .input_width(input_width) 4781 .input_height(4) 4782 .kernel_height(5) 4783 .kernel_width(5) 4784 .subsampling(2) 4785 .padding_left(2) 4786 .padding_right(2) 4787 .padding_top(2) 4788 .padding_bottom(2) 4789 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4790 } 4791 } 4792 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_height_eq_2)4793 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_height_eq_2) { 4794 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4795 for (size_t input_height = 3; input_height < 5; input_height++) { 4796 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4797 DWConv2DMicrokernelTester() 4798 .input_width(input_width) 4799 .input_height(input_height) 4800 .kernel_height(5) 4801 .kernel_width(5) 4802 .subsampling(2) 4803 .padding_left(2) 4804 .padding_right(2) 4805 .padding_top(2) 4806 .padding_bottom(2) 4807 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4808 } 4809 } 4810 } 4811 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_height_div_2)4812 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_height_div_2) { 4813 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4814 for (size_t input_height = 8; input_height < 32; input_height += 4) { 4815 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4816 DWConv2DMicrokernelTester() 4817 .input_width(input_width) 4818 .input_height(input_height) 4819 .kernel_height(5) 4820 .kernel_width(5) 4821 .subsampling(2) 4822 .padding_left(2) 4823 .padding_right(2) 4824 .padding_top(2) 4825 .padding_bottom(2) 4826 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4827 } 4828 } 4829 } 4830 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_height_lt_2)4831 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_height_lt_2) { 4832 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4833 for (size_t input_height = 1; input_height < 3; input_height++) { 4834 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4835 DWConv2DMicrokernelTester() 4836 .input_width(input_width) 4837 .input_height(input_height) 4838 .kernel_height(5) 4839 .kernel_width(5) 4840 .subsampling(2) 4841 .padding_left(2) 4842 .padding_right(2) 4843 .padding_top(2) 4844 .padding_bottom(2) 4845 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4846 } 4847 } 4848 } 4849 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,output_height_gt_2)4850 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, output_height_gt_2) { 4851 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4852 for (size_t input_height = 5; input_height < 9; input_height++) { 4853 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4854 DWConv2DMicrokernelTester() 4855 .input_width(input_width) 4856 .input_height(input_height) 4857 .kernel_height(5) 4858 .kernel_width(5) 4859 .subsampling(2) 4860 .padding_left(2) 4861 .padding_right(2) 4862 .padding_top(2) 4863 .padding_bottom(2) 4864 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4865 } 4866 } 4867 } 4868 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3,padding_top_eq_1)4869 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_2X4_ACC3, padding_top_eq_1) { 4870 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4871 for (size_t input_height = 2; input_height < 14; input_height++) { 4872 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4873 DWConv2DMicrokernelTester() 4874 .input_width(input_width) 4875 .input_height(input_height) 4876 .kernel_height(5) 4877 .kernel_width(5) 4878 .subsampling(2) 4879 .padding_left(2) 4880 .padding_right(2) 4881 .padding_top(1) 4882 .padding_bottom(2) 4883 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_2x4_acc3, xnn_init_f16_chw_params); 4884 } 4885 } 4886 } 4887 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 4888 4889 4890 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_width_eq_4)4891 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_width_eq_4) { 4892 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4893 for (size_t input_width = 7; input_width < 9; input_width++) { 4894 DWConv2DMicrokernelTester() 4895 .input_width(input_width) 4896 .input_height(6) 4897 .kernel_height(5) 4898 .kernel_width(5) 4899 .subsampling(2) 4900 .padding_left(2) 4901 .padding_right(2) 4902 .padding_top(2) 4903 .padding_bottom(2) 4904 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 4905 } 4906 } 4907 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_width_div_4)4908 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_width_div_4) { 4909 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4910 for (size_t input_width = 16; input_width < 64; input_width += 8) { 4911 DWConv2DMicrokernelTester() 4912 .input_width(input_width) 4913 .input_height(6) 4914 .kernel_height(5) 4915 .kernel_width(5) 4916 .subsampling(2) 4917 .padding_left(2) 4918 .padding_right(2) 4919 .padding_top(2) 4920 .padding_bottom(2) 4921 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 4922 } 4923 } 4924 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_width_lt_4)4925 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_width_lt_4) { 4926 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4927 for (size_t input_width = 1; input_width < 7; input_width++) { 4928 DWConv2DMicrokernelTester() 4929 .input_width(8) 4930 .input_height(6) 4931 .kernel_height(5) 4932 .kernel_width(5) 4933 .subsampling(2) 4934 .padding_left(2) 4935 .padding_right(2) 4936 .padding_top(2) 4937 .padding_bottom(2) 4938 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 4939 } 4940 } 4941 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_width_gt_4)4942 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_width_gt_4) { 4943 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4944 for (size_t input_width = 9; input_width < 17; input_width++) { 4945 DWConv2DMicrokernelTester() 4946 .input_width(input_width) 4947 .input_height(6) 4948 .kernel_height(5) 4949 .kernel_width(5) 4950 .subsampling(2) 4951 .padding_left(2) 4952 .padding_right(2) 4953 .padding_top(2) 4954 .padding_bottom(2) 4955 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 4956 } 4957 } 4958 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_height_eq_3)4959 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_height_eq_3) { 4960 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4961 for (size_t input_height = 5; input_height < 7; input_height++) { 4962 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4963 DWConv2DMicrokernelTester() 4964 .input_width(input_width) 4965 .input_height(input_height) 4966 .kernel_height(5) 4967 .kernel_width(5) 4968 .subsampling(2) 4969 .padding_left(2) 4970 .padding_right(2) 4971 .padding_top(2) 4972 .padding_bottom(2) 4973 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 4974 } 4975 } 4976 } 4977 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_height_div_3)4978 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_height_div_3) { 4979 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4980 for (size_t input_height = 12; input_height < 48; input_height += 6) { 4981 for (size_t input_width = 1; input_width < 41; input_width += 7) { 4982 DWConv2DMicrokernelTester() 4983 .input_width(input_width) 4984 .input_height(input_height) 4985 .kernel_height(5) 4986 .kernel_width(5) 4987 .subsampling(2) 4988 .padding_left(2) 4989 .padding_right(2) 4990 .padding_top(2) 4991 .padding_bottom(2) 4992 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 4993 } 4994 } 4995 } 4996 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_height_lt_3)4997 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_height_lt_3) { 4998 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4999 for (size_t input_height = 1; input_height < 5; input_height++) { 5000 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5001 DWConv2DMicrokernelTester() 5002 .input_width(input_width) 5003 .input_height(input_height) 5004 .kernel_height(5) 5005 .kernel_width(5) 5006 .subsampling(2) 5007 .padding_left(2) 5008 .padding_right(2) 5009 .padding_top(2) 5010 .padding_bottom(2) 5011 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 5012 } 5013 } 5014 } 5015 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,output_height_gt_3)5016 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, output_height_gt_3) { 5017 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5018 for (size_t input_height = 7; input_height < 13; input_height++) { 5019 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5020 DWConv2DMicrokernelTester() 5021 .input_width(input_width) 5022 .input_height(input_height) 5023 .kernel_height(5) 5024 .kernel_width(5) 5025 .subsampling(2) 5026 .padding_left(2) 5027 .padding_right(2) 5028 .padding_top(2) 5029 .padding_bottom(2) 5030 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 5031 } 5032 } 5033 } 5034 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4,padding_top_eq_1)5035 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4, padding_top_eq_1) { 5036 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5037 for (size_t input_height = 2; input_height < 20; input_height++) { 5038 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5039 DWConv2DMicrokernelTester() 5040 .input_width(input_width) 5041 .input_height(input_height) 5042 .kernel_height(5) 5043 .kernel_width(5) 5044 .subsampling(2) 5045 .padding_left(2) 5046 .padding_right(2) 5047 .padding_top(1) 5048 .padding_bottom(2) 5049 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4, xnn_init_f16_chw_params); 5050 } 5051 } 5052 } 5053 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 5054 5055 5056 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_width_eq_4)5057 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_width_eq_4) { 5058 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5059 for (size_t input_width = 7; input_width < 9; input_width++) { 5060 DWConv2DMicrokernelTester() 5061 .input_width(input_width) 5062 .input_height(6) 5063 .kernel_height(5) 5064 .kernel_width(5) 5065 .subsampling(2) 5066 .padding_left(2) 5067 .padding_right(2) 5068 .padding_top(2) 5069 .padding_bottom(2) 5070 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5071 } 5072 } 5073 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_width_div_4)5074 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_width_div_4) { 5075 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5076 for (size_t input_width = 16; input_width < 64; input_width += 8) { 5077 DWConv2DMicrokernelTester() 5078 .input_width(input_width) 5079 .input_height(6) 5080 .kernel_height(5) 5081 .kernel_width(5) 5082 .subsampling(2) 5083 .padding_left(2) 5084 .padding_right(2) 5085 .padding_top(2) 5086 .padding_bottom(2) 5087 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5088 } 5089 } 5090 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_width_lt_4)5091 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_width_lt_4) { 5092 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5093 for (size_t input_width = 1; input_width < 7; input_width++) { 5094 DWConv2DMicrokernelTester() 5095 .input_width(8) 5096 .input_height(6) 5097 .kernel_height(5) 5098 .kernel_width(5) 5099 .subsampling(2) 5100 .padding_left(2) 5101 .padding_right(2) 5102 .padding_top(2) 5103 .padding_bottom(2) 5104 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5105 } 5106 } 5107 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_width_gt_4)5108 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_width_gt_4) { 5109 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5110 for (size_t input_width = 9; input_width < 17; input_width++) { 5111 DWConv2DMicrokernelTester() 5112 .input_width(input_width) 5113 .input_height(6) 5114 .kernel_height(5) 5115 .kernel_width(5) 5116 .subsampling(2) 5117 .padding_left(2) 5118 .padding_right(2) 5119 .padding_top(2) 5120 .padding_bottom(2) 5121 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5122 } 5123 } 5124 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_height_eq_3)5125 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_height_eq_3) { 5126 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5127 for (size_t input_height = 5; input_height < 7; input_height++) { 5128 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5129 DWConv2DMicrokernelTester() 5130 .input_width(input_width) 5131 .input_height(input_height) 5132 .kernel_height(5) 5133 .kernel_width(5) 5134 .subsampling(2) 5135 .padding_left(2) 5136 .padding_right(2) 5137 .padding_top(2) 5138 .padding_bottom(2) 5139 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5140 } 5141 } 5142 } 5143 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_height_div_3)5144 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_height_div_3) { 5145 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5146 for (size_t input_height = 12; input_height < 48; input_height += 6) { 5147 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5148 DWConv2DMicrokernelTester() 5149 .input_width(input_width) 5150 .input_height(input_height) 5151 .kernel_height(5) 5152 .kernel_width(5) 5153 .subsampling(2) 5154 .padding_left(2) 5155 .padding_right(2) 5156 .padding_top(2) 5157 .padding_bottom(2) 5158 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5159 } 5160 } 5161 } 5162 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_height_lt_3)5163 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_height_lt_3) { 5164 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5165 for (size_t input_height = 1; input_height < 5; input_height++) { 5166 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5167 DWConv2DMicrokernelTester() 5168 .input_width(input_width) 5169 .input_height(input_height) 5170 .kernel_height(5) 5171 .kernel_width(5) 5172 .subsampling(2) 5173 .padding_left(2) 5174 .padding_right(2) 5175 .padding_top(2) 5176 .padding_bottom(2) 5177 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5178 } 5179 } 5180 } 5181 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,output_height_gt_3)5182 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, output_height_gt_3) { 5183 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5184 for (size_t input_height = 7; input_height < 13; input_height++) { 5185 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5186 DWConv2DMicrokernelTester() 5187 .input_width(input_width) 5188 .input_height(input_height) 5189 .kernel_height(5) 5190 .kernel_width(5) 5191 .subsampling(2) 5192 .padding_left(2) 5193 .padding_right(2) 5194 .padding_top(2) 5195 .padding_bottom(2) 5196 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5197 } 5198 } 5199 } 5200 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2,padding_top_eq_1)5201 TEST(F16_DWCONV2D_CHW_5X5S2P2__NEONFP16ARITH_3X4_ACC2, padding_top_eq_1) { 5202 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 5203 for (size_t input_height = 2; input_height < 20; input_height++) { 5204 for (size_t input_width = 1; input_width < 41; input_width += 7) { 5205 DWConv2DMicrokernelTester() 5206 .input_width(input_width) 5207 .input_height(input_height) 5208 .kernel_height(5) 5209 .kernel_width(5) 5210 .subsampling(2) 5211 .padding_left(2) 5212 .padding_right(2) 5213 .padding_top(1) 5214 .padding_bottom(2) 5215 .Test(xnn_f16_dwconv2d_chw_ukernel_5x5s2p2__neonfp16arith_3x4_acc2, xnn_init_f16_chw_params); 5216 } 5217 } 5218 } 5219 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 5220