1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/f16-dwconv-minmax.yaml 11 // Generator: tools/generate-dwconv-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/common.h> 17 #include <xnnpack/isa-checks.h> 18 19 #include <xnnpack/dwconv.h> 20 #include "dwconv-microkernel-tester.h" 21 22 23 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_eq_8)24 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_eq_8) { 25 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 26 DWConvMicrokernelTester() 27 .cr(8) 28 .kr(3) 29 .channels(8) 30 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 31 } 32 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_div_8)33 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_div_8) { 34 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 35 for (uint32_t channels = 16; channels < 128; channels += 24) { 36 DWConvMicrokernelTester() 37 .cr(8) 38 .kr(3) 39 .channels(channels) 40 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 41 } 42 } 43 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_div_8_with_qmin)44 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_div_8_with_qmin) { 45 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 46 for (uint32_t channels = 16; channels < 128; channels += 24) { 47 DWConvMicrokernelTester() 48 .cr(8) 49 .kr(3) 50 .channels(channels) 51 .qmin(128) 52 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 53 } 54 } 55 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_div_8_with_qmax)56 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_div_8_with_qmax) { 57 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 58 for (uint32_t channels = 16; channels < 128; channels += 24) { 59 DWConvMicrokernelTester() 60 .cr(8) 61 .kr(3) 62 .channels(channels) 63 .qmax(128) 64 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 65 } 66 } 67 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_lt_8)68 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_lt_8) { 69 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 70 for (uint32_t channels = 1; channels < 8; channels++) { 71 DWConvMicrokernelTester() 72 .cr(8) 73 .kr(3) 74 .channels(channels) 75 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 76 } 77 } 78 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_gt_8)79 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_gt_8) { 80 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 81 for (uint32_t channels = 9; channels < 16; channels++) { 82 DWConvMicrokernelTester() 83 .cr(8) 84 .kr(3) 85 .channels(channels) 86 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 87 } 88 } 89 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_gt_8_with_qmin)90 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_gt_8_with_qmin) { 91 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 92 for (uint32_t channels = 9; channels < 16; channels++) { 93 DWConvMicrokernelTester() 94 .cr(8) 95 .kr(3) 96 .channels(channels) 97 .qmin(128) 98 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 99 } 100 } 101 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,c_gt_8_with_qmax)102 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, c_gt_8_with_qmax) { 103 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 104 for (uint32_t channels = 9; channels < 16; channels++) { 105 DWConvMicrokernelTester() 106 .cr(8) 107 .kr(3) 108 .channels(channels) 109 .qmax(128) 110 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 111 } 112 } 113 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel)114 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel) { 115 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 116 for (size_t channels = 1; channels <= 40; channels += 7) { 117 DWConvMicrokernelTester() 118 .cr(8) 119 .kr(3) 120 .channels(channels) 121 .width(3) 122 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 123 } 124 } 125 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_step)126 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_step) { 127 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 128 for (size_t channels = 1; channels <= 40; channels += 7) { 129 for (size_t step = 2; step <= 3; step++) { 130 DWConvMicrokernelTester() 131 .cr(8) 132 .kr(3) 133 .channels(channels) 134 .width(3) 135 .step(step) 136 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 137 } 138 } 139 } 140 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_output_stride)141 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_output_stride) { 142 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 143 for (size_t channels = 1; channels <= 40; channels += 7) { 144 DWConvMicrokernelTester() 145 .cr(8) 146 .kr(3) 147 .channels(8) 148 .width(5) 149 .output_stride(43) 150 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 151 } 152 } 153 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_qmin)154 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_qmin) { 155 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 156 for (size_t channels = 1; channels <= 40; channels += 7) { 157 DWConvMicrokernelTester() 158 .cr(8) 159 .kr(3) 160 .channels(channels) 161 .width(3) 162 .qmin(128) 163 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 164 } 165 } 166 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,multipixel_with_qmax)167 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, multipixel_with_qmax) { 168 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 169 for (size_t channels = 1; channels <= 40; channels += 7) { 170 DWConvMicrokernelTester() 171 .cr(8) 172 .kr(3) 173 .channels(channels) 174 .width(3) 175 .qmax(128) 176 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 177 } 178 } 179 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,input_offset)180 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, input_offset) { 181 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 182 for (uint32_t channels = 16; channels < 128; channels += 24) { 183 DWConvMicrokernelTester() 184 .cr(8) 185 .kr(3) 186 .channels(channels) 187 .input_offset(176) 188 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 189 } 190 } 191 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH,zero)192 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH, zero) { 193 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 194 for (uint32_t mz = 0; mz < 3; mz++) { 195 for (uint32_t channels = 16; channels < 128; channels += 24) { 196 DWConvMicrokernelTester() 197 .cr(8) 198 .kr(3) 199 .channels(channels) 200 .input_offset(176) 201 .zero_index(mz) 202 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 203 } 204 } 205 } 206 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 207 208 209 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_eq_8)210 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_eq_8) { 211 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 212 DWConvMicrokernelTester() 213 .cr(8) 214 .kr(3) 215 .channels(8) 216 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 217 } 218 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_div_8)219 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_div_8) { 220 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 221 for (uint32_t channels = 16; channels < 128; channels += 24) { 222 DWConvMicrokernelTester() 223 .cr(8) 224 .kr(3) 225 .channels(channels) 226 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 227 } 228 } 229 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_div_8_with_qmin)230 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 231 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 232 for (uint32_t channels = 16; channels < 128; channels += 24) { 233 DWConvMicrokernelTester() 234 .cr(8) 235 .kr(3) 236 .channels(channels) 237 .qmin(128) 238 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 239 } 240 } 241 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_div_8_with_qmax)242 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 243 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 244 for (uint32_t channels = 16; channels < 128; channels += 24) { 245 DWConvMicrokernelTester() 246 .cr(8) 247 .kr(3) 248 .channels(channels) 249 .qmax(128) 250 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 251 } 252 } 253 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_lt_8)254 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_lt_8) { 255 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 256 for (uint32_t channels = 1; channels < 8; channels++) { 257 DWConvMicrokernelTester() 258 .cr(8) 259 .kr(3) 260 .channels(channels) 261 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 262 } 263 } 264 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_gt_8)265 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_gt_8) { 266 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 267 for (uint32_t channels = 9; channels < 16; channels++) { 268 DWConvMicrokernelTester() 269 .cr(8) 270 .kr(3) 271 .channels(channels) 272 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 273 } 274 } 275 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)276 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 277 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 278 for (uint32_t channels = 9; channels < 16; channels++) { 279 DWConvMicrokernelTester() 280 .cr(8) 281 .kr(3) 282 .channels(channels) 283 .qmin(128) 284 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 285 } 286 } 287 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)288 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 289 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 290 for (uint32_t channels = 9; channels < 16; channels++) { 291 DWConvMicrokernelTester() 292 .cr(8) 293 .kr(3) 294 .channels(channels) 295 .qmax(128) 296 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 297 } 298 } 299 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel)300 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel) { 301 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 302 for (size_t channels = 1; channels <= 40; channels += 7) { 303 DWConvMicrokernelTester() 304 .cr(8) 305 .kr(3) 306 .channels(channels) 307 .width(3) 308 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 309 } 310 } 311 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_step)312 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_step) { 313 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 314 for (size_t channels = 1; channels <= 40; channels += 7) { 315 for (size_t step = 2; step <= 3; step++) { 316 DWConvMicrokernelTester() 317 .cr(8) 318 .kr(3) 319 .channels(channels) 320 .width(3) 321 .step(step) 322 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 323 } 324 } 325 } 326 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_output_stride)327 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 328 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 329 for (size_t channels = 1; channels <= 40; channels += 7) { 330 DWConvMicrokernelTester() 331 .cr(8) 332 .kr(3) 333 .channels(8) 334 .width(5) 335 .output_stride(43) 336 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 337 } 338 } 339 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_qmin)340 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 341 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 342 for (size_t channels = 1; channels <= 40; channels += 7) { 343 DWConvMicrokernelTester() 344 .cr(8) 345 .kr(3) 346 .channels(channels) 347 .width(3) 348 .qmin(128) 349 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 350 } 351 } 352 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,multipixel_with_qmax)353 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 354 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 355 for (size_t channels = 1; channels <= 40; channels += 7) { 356 DWConvMicrokernelTester() 357 .cr(8) 358 .kr(3) 359 .channels(channels) 360 .width(3) 361 .qmax(128) 362 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 363 } 364 } 365 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,input_offset)366 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, input_offset) { 367 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 368 for (uint32_t channels = 16; channels < 128; channels += 24) { 369 DWConvMicrokernelTester() 370 .cr(8) 371 .kr(3) 372 .channels(channels) 373 .input_offset(176) 374 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 375 } 376 } 377 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2,zero)378 TEST(F16_DWCONV_MINMAX_UP8X3__NEONFP16ARITH_ACC2, zero) { 379 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 380 for (uint32_t mz = 0; mz < 3; mz++) { 381 for (uint32_t channels = 16; channels < 128; channels += 24) { 382 DWConvMicrokernelTester() 383 .cr(8) 384 .kr(3) 385 .channels(channels) 386 .input_offset(176) 387 .zero_index(mz) 388 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 389 } 390 } 391 } 392 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 393 394 395 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_eq_8)396 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_eq_8) { 397 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 398 DWConvMicrokernelTester() 399 .cr(8) 400 .kr(4) 401 .channels(8) 402 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 403 } 404 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8)405 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8) { 406 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 407 for (uint32_t channels = 16; channels < 128; channels += 24) { 408 DWConvMicrokernelTester() 409 .cr(8) 410 .kr(4) 411 .channels(channels) 412 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 413 } 414 } 415 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmin)416 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmin) { 417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 418 for (uint32_t channels = 16; channels < 128; channels += 24) { 419 DWConvMicrokernelTester() 420 .cr(8) 421 .kr(4) 422 .channels(channels) 423 .qmin(128) 424 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 425 } 426 } 427 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmax)428 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmax) { 429 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 430 for (uint32_t channels = 16; channels < 128; channels += 24) { 431 DWConvMicrokernelTester() 432 .cr(8) 433 .kr(4) 434 .channels(channels) 435 .qmax(128) 436 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 437 } 438 } 439 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_lt_8)440 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_lt_8) { 441 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 442 for (uint32_t channels = 1; channels < 8; channels++) { 443 DWConvMicrokernelTester() 444 .cr(8) 445 .kr(4) 446 .channels(channels) 447 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 448 } 449 } 450 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8)451 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8) { 452 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 453 for (uint32_t channels = 9; channels < 16; channels++) { 454 DWConvMicrokernelTester() 455 .cr(8) 456 .kr(4) 457 .channels(channels) 458 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 459 } 460 } 461 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmin)462 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmin) { 463 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 464 for (uint32_t channels = 9; channels < 16; channels++) { 465 DWConvMicrokernelTester() 466 .cr(8) 467 .kr(4) 468 .channels(channels) 469 .qmin(128) 470 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 471 } 472 } 473 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmax)474 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmax) { 475 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 476 for (uint32_t channels = 9; channels < 16; channels++) { 477 DWConvMicrokernelTester() 478 .cr(8) 479 .kr(4) 480 .channels(channels) 481 .qmax(128) 482 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 483 } 484 } 485 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel)486 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel) { 487 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 488 for (size_t channels = 1; channels <= 40; channels += 7) { 489 DWConvMicrokernelTester() 490 .cr(8) 491 .kr(4) 492 .channels(channels) 493 .width(3) 494 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 495 } 496 } 497 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_step)498 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_step) { 499 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 500 for (size_t channels = 1; channels <= 40; channels += 7) { 501 for (size_t step = 2; step <= 4; step++) { 502 DWConvMicrokernelTester() 503 .cr(8) 504 .kr(4) 505 .channels(channels) 506 .width(3) 507 .step(step) 508 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 509 } 510 } 511 } 512 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_output_stride)513 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_output_stride) { 514 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 515 for (size_t channels = 1; channels <= 40; channels += 7) { 516 DWConvMicrokernelTester() 517 .cr(8) 518 .kr(4) 519 .channels(8) 520 .width(5) 521 .output_stride(43) 522 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 523 } 524 } 525 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmin)526 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmin) { 527 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 528 for (size_t channels = 1; channels <= 40; channels += 7) { 529 DWConvMicrokernelTester() 530 .cr(8) 531 .kr(4) 532 .channels(channels) 533 .width(3) 534 .qmin(128) 535 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 536 } 537 } 538 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmax)539 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmax) { 540 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 541 for (size_t channels = 1; channels <= 40; channels += 7) { 542 DWConvMicrokernelTester() 543 .cr(8) 544 .kr(4) 545 .channels(channels) 546 .width(3) 547 .qmax(128) 548 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 549 } 550 } 551 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,input_offset)552 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, input_offset) { 553 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 554 for (uint32_t channels = 16; channels < 128; channels += 24) { 555 DWConvMicrokernelTester() 556 .cr(8) 557 .kr(4) 558 .channels(channels) 559 .input_offset(176) 560 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 561 } 562 } 563 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,zero)564 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, zero) { 565 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 566 for (uint32_t mz = 0; mz < 4; mz++) { 567 for (uint32_t channels = 16; channels < 128; channels += 24) { 568 DWConvMicrokernelTester() 569 .cr(8) 570 .kr(4) 571 .channels(channels) 572 .input_offset(176) 573 .zero_index(mz) 574 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 575 } 576 } 577 } 578 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 579 580 581 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_eq_8)582 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_eq_8) { 583 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 584 DWConvMicrokernelTester() 585 .cr(8) 586 .kr(4) 587 .channels(8) 588 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 589 } 590 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8)591 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8) { 592 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 593 for (uint32_t channels = 16; channels < 128; channels += 24) { 594 DWConvMicrokernelTester() 595 .cr(8) 596 .kr(4) 597 .channels(channels) 598 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 599 } 600 } 601 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmin)602 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 603 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 604 for (uint32_t channels = 16; channels < 128; channels += 24) { 605 DWConvMicrokernelTester() 606 .cr(8) 607 .kr(4) 608 .channels(channels) 609 .qmin(128) 610 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 611 } 612 } 613 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmax)614 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 615 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 616 for (uint32_t channels = 16; channels < 128; channels += 24) { 617 DWConvMicrokernelTester() 618 .cr(8) 619 .kr(4) 620 .channels(channels) 621 .qmax(128) 622 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 623 } 624 } 625 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_lt_8)626 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_lt_8) { 627 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 628 for (uint32_t channels = 1; channels < 8; channels++) { 629 DWConvMicrokernelTester() 630 .cr(8) 631 .kr(4) 632 .channels(channels) 633 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 634 } 635 } 636 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8)637 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8) { 638 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 639 for (uint32_t channels = 9; channels < 16; channels++) { 640 DWConvMicrokernelTester() 641 .cr(8) 642 .kr(4) 643 .channels(channels) 644 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 645 } 646 } 647 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)648 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 649 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 650 for (uint32_t channels = 9; channels < 16; channels++) { 651 DWConvMicrokernelTester() 652 .cr(8) 653 .kr(4) 654 .channels(channels) 655 .qmin(128) 656 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 657 } 658 } 659 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)660 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 661 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 662 for (uint32_t channels = 9; channels < 16; channels++) { 663 DWConvMicrokernelTester() 664 .cr(8) 665 .kr(4) 666 .channels(channels) 667 .qmax(128) 668 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 669 } 670 } 671 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel)672 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel) { 673 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 674 for (size_t channels = 1; channels <= 40; channels += 7) { 675 DWConvMicrokernelTester() 676 .cr(8) 677 .kr(4) 678 .channels(channels) 679 .width(3) 680 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 681 } 682 } 683 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_step)684 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_step) { 685 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 686 for (size_t channels = 1; channels <= 40; channels += 7) { 687 for (size_t step = 2; step <= 4; step++) { 688 DWConvMicrokernelTester() 689 .cr(8) 690 .kr(4) 691 .channels(channels) 692 .width(3) 693 .step(step) 694 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 695 } 696 } 697 } 698 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)699 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 700 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 701 for (size_t channels = 1; channels <= 40; channels += 7) { 702 DWConvMicrokernelTester() 703 .cr(8) 704 .kr(4) 705 .channels(8) 706 .width(5) 707 .output_stride(43) 708 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 709 } 710 } 711 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)712 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 713 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 714 for (size_t channels = 1; channels <= 40; channels += 7) { 715 DWConvMicrokernelTester() 716 .cr(8) 717 .kr(4) 718 .channels(channels) 719 .width(3) 720 .qmin(128) 721 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 722 } 723 } 724 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)725 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 726 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 727 for (size_t channels = 1; channels <= 40; channels += 7) { 728 DWConvMicrokernelTester() 729 .cr(8) 730 .kr(4) 731 .channels(channels) 732 .width(3) 733 .qmax(128) 734 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 735 } 736 } 737 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,input_offset)738 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, input_offset) { 739 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 740 for (uint32_t channels = 16; channels < 128; channels += 24) { 741 DWConvMicrokernelTester() 742 .cr(8) 743 .kr(4) 744 .channels(channels) 745 .input_offset(176) 746 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 747 } 748 } 749 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,zero)750 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, zero) { 751 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 752 for (uint32_t mz = 0; mz < 4; mz++) { 753 for (uint32_t channels = 16; channels < 128; channels += 24) { 754 DWConvMicrokernelTester() 755 .cr(8) 756 .kr(4) 757 .channels(channels) 758 .input_offset(176) 759 .zero_index(mz) 760 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 761 } 762 } 763 } 764 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 765 766 767 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_eq_8)768 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_eq_8) { 769 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 770 DWConvMicrokernelTester() 771 .cr(8) 772 .kr(9) 773 .channels(8) 774 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 775 } 776 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8)777 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8) { 778 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 779 for (uint32_t channels = 16; channels < 128; channels += 24) { 780 DWConvMicrokernelTester() 781 .cr(8) 782 .kr(9) 783 .channels(channels) 784 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 785 } 786 } 787 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmin)788 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmin) { 789 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 790 for (uint32_t channels = 16; channels < 128; channels += 24) { 791 DWConvMicrokernelTester() 792 .cr(8) 793 .kr(9) 794 .channels(channels) 795 .qmin(128) 796 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 797 } 798 } 799 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmax)800 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmax) { 801 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 802 for (uint32_t channels = 16; channels < 128; channels += 24) { 803 DWConvMicrokernelTester() 804 .cr(8) 805 .kr(9) 806 .channels(channels) 807 .qmax(128) 808 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 809 } 810 } 811 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_lt_8)812 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_lt_8) { 813 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 814 for (uint32_t channels = 1; channels < 8; channels++) { 815 DWConvMicrokernelTester() 816 .cr(8) 817 .kr(9) 818 .channels(channels) 819 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 820 } 821 } 822 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8)823 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8) { 824 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 825 for (uint32_t channels = 9; channels < 16; channels++) { 826 DWConvMicrokernelTester() 827 .cr(8) 828 .kr(9) 829 .channels(channels) 830 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 831 } 832 } 833 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmin)834 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmin) { 835 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 836 for (uint32_t channels = 9; channels < 16; channels++) { 837 DWConvMicrokernelTester() 838 .cr(8) 839 .kr(9) 840 .channels(channels) 841 .qmin(128) 842 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 843 } 844 } 845 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmax)846 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmax) { 847 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 848 for (uint32_t channels = 9; channels < 16; channels++) { 849 DWConvMicrokernelTester() 850 .cr(8) 851 .kr(9) 852 .channels(channels) 853 .qmax(128) 854 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 855 } 856 } 857 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel)858 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel) { 859 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 860 for (size_t channels = 1; channels <= 40; channels += 7) { 861 DWConvMicrokernelTester() 862 .cr(8) 863 .kr(9) 864 .channels(channels) 865 .width(3) 866 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 867 } 868 } 869 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_step)870 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_step) { 871 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 872 for (size_t channels = 1; channels <= 40; channels += 7) { 873 for (size_t step = 2; step <= 9; step++) { 874 DWConvMicrokernelTester() 875 .cr(8) 876 .kr(9) 877 .channels(channels) 878 .width(3) 879 .step(step) 880 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 881 } 882 } 883 } 884 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_output_stride)885 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_output_stride) { 886 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 887 for (size_t channels = 1; channels <= 40; channels += 7) { 888 DWConvMicrokernelTester() 889 .cr(8) 890 .kr(9) 891 .channels(8) 892 .width(5) 893 .output_stride(43) 894 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 895 } 896 } 897 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmin)898 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmin) { 899 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 900 for (size_t channels = 1; channels <= 40; channels += 7) { 901 DWConvMicrokernelTester() 902 .cr(8) 903 .kr(9) 904 .channels(channels) 905 .width(3) 906 .qmin(128) 907 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 908 } 909 } 910 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmax)911 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmax) { 912 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 913 for (size_t channels = 1; channels <= 40; channels += 7) { 914 DWConvMicrokernelTester() 915 .cr(8) 916 .kr(9) 917 .channels(channels) 918 .width(3) 919 .qmax(128) 920 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 921 } 922 } 923 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,input_offset)924 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, input_offset) { 925 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 926 for (uint32_t channels = 16; channels < 128; channels += 24) { 927 DWConvMicrokernelTester() 928 .cr(8) 929 .kr(9) 930 .channels(channels) 931 .input_offset(176) 932 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 933 } 934 } 935 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,zero)936 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, zero) { 937 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 938 for (uint32_t mz = 0; mz < 9; mz++) { 939 for (uint32_t channels = 16; channels < 128; channels += 24) { 940 DWConvMicrokernelTester() 941 .cr(8) 942 .kr(9) 943 .channels(channels) 944 .input_offset(176) 945 .zero_index(mz) 946 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 947 } 948 } 949 } 950 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 951 952 953 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_eq_8)954 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_eq_8) { 955 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 956 DWConvMicrokernelTester() 957 .cr(8) 958 .kr(9) 959 .channels(8) 960 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 961 } 962 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8)963 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8) { 964 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 965 for (uint32_t channels = 16; channels < 128; channels += 24) { 966 DWConvMicrokernelTester() 967 .cr(8) 968 .kr(9) 969 .channels(channels) 970 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 971 } 972 } 973 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmin)974 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 975 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 976 for (uint32_t channels = 16; channels < 128; channels += 24) { 977 DWConvMicrokernelTester() 978 .cr(8) 979 .kr(9) 980 .channels(channels) 981 .qmin(128) 982 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 983 } 984 } 985 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmax)986 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 987 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 988 for (uint32_t channels = 16; channels < 128; channels += 24) { 989 DWConvMicrokernelTester() 990 .cr(8) 991 .kr(9) 992 .channels(channels) 993 .qmax(128) 994 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 995 } 996 } 997 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_lt_8)998 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_lt_8) { 999 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1000 for (uint32_t channels = 1; channels < 8; channels++) { 1001 DWConvMicrokernelTester() 1002 .cr(8) 1003 .kr(9) 1004 .channels(channels) 1005 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1006 } 1007 } 1008 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8)1009 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8) { 1010 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1011 for (uint32_t channels = 9; channels < 16; channels++) { 1012 DWConvMicrokernelTester() 1013 .cr(8) 1014 .kr(9) 1015 .channels(channels) 1016 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1017 } 1018 } 1019 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1020 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 1021 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1022 for (uint32_t channels = 9; channels < 16; channels++) { 1023 DWConvMicrokernelTester() 1024 .cr(8) 1025 .kr(9) 1026 .channels(channels) 1027 .qmin(128) 1028 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1029 } 1030 } 1031 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1032 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 1033 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1034 for (uint32_t channels = 9; channels < 16; channels++) { 1035 DWConvMicrokernelTester() 1036 .cr(8) 1037 .kr(9) 1038 .channels(channels) 1039 .qmax(128) 1040 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1041 } 1042 } 1043 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel)1044 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel) { 1045 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1046 for (size_t channels = 1; channels <= 40; channels += 7) { 1047 DWConvMicrokernelTester() 1048 .cr(8) 1049 .kr(9) 1050 .channels(channels) 1051 .width(3) 1052 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1053 } 1054 } 1055 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_step)1056 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_step) { 1057 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1058 for (size_t channels = 1; channels <= 40; channels += 7) { 1059 for (size_t step = 2; step <= 9; step++) { 1060 DWConvMicrokernelTester() 1061 .cr(8) 1062 .kr(9) 1063 .channels(channels) 1064 .width(3) 1065 .step(step) 1066 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1067 } 1068 } 1069 } 1070 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1071 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 1072 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1073 for (size_t channels = 1; channels <= 40; channels += 7) { 1074 DWConvMicrokernelTester() 1075 .cr(8) 1076 .kr(9) 1077 .channels(8) 1078 .width(5) 1079 .output_stride(43) 1080 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1081 } 1082 } 1083 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)1084 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 1085 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1086 for (size_t channels = 1; channels <= 40; channels += 7) { 1087 DWConvMicrokernelTester() 1088 .cr(8) 1089 .kr(9) 1090 .channels(channels) 1091 .width(3) 1092 .qmin(128) 1093 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1094 } 1095 } 1096 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)1097 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 1098 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1099 for (size_t channels = 1; channels <= 40; channels += 7) { 1100 DWConvMicrokernelTester() 1101 .cr(8) 1102 .kr(9) 1103 .channels(channels) 1104 .width(3) 1105 .qmax(128) 1106 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1107 } 1108 } 1109 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,input_offset)1110 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, input_offset) { 1111 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1112 for (uint32_t channels = 16; channels < 128; channels += 24) { 1113 DWConvMicrokernelTester() 1114 .cr(8) 1115 .kr(9) 1116 .channels(channels) 1117 .input_offset(176) 1118 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1119 } 1120 } 1121 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,zero)1122 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, zero) { 1123 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1124 for (uint32_t mz = 0; mz < 9; mz++) { 1125 for (uint32_t channels = 16; channels < 128; channels += 24) { 1126 DWConvMicrokernelTester() 1127 .cr(8) 1128 .kr(9) 1129 .channels(channels) 1130 .input_offset(176) 1131 .zero_index(mz) 1132 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1133 } 1134 } 1135 } 1136 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 1137 1138 1139 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_eq_8)1140 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_eq_8) { 1141 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1142 DWConvMicrokernelTester() 1143 .cr(8) 1144 .kr(25) 1145 .channels(8) 1146 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1147 } 1148 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8)1149 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8) { 1150 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1151 for (uint32_t channels = 16; channels < 128; channels += 24) { 1152 DWConvMicrokernelTester() 1153 .cr(8) 1154 .kr(25) 1155 .channels(channels) 1156 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1157 } 1158 } 1159 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmin)1160 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmin) { 1161 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1162 for (uint32_t channels = 16; channels < 128; channels += 24) { 1163 DWConvMicrokernelTester() 1164 .cr(8) 1165 .kr(25) 1166 .channels(channels) 1167 .qmin(128) 1168 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1169 } 1170 } 1171 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmax)1172 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmax) { 1173 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1174 for (uint32_t channels = 16; channels < 128; channels += 24) { 1175 DWConvMicrokernelTester() 1176 .cr(8) 1177 .kr(25) 1178 .channels(channels) 1179 .qmax(128) 1180 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1181 } 1182 } 1183 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_lt_8)1184 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_lt_8) { 1185 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1186 for (uint32_t channels = 1; channels < 8; channels++) { 1187 DWConvMicrokernelTester() 1188 .cr(8) 1189 .kr(25) 1190 .channels(channels) 1191 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1192 } 1193 } 1194 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8)1195 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8) { 1196 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1197 for (uint32_t channels = 9; channels < 16; channels++) { 1198 DWConvMicrokernelTester() 1199 .cr(8) 1200 .kr(25) 1201 .channels(channels) 1202 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1203 } 1204 } 1205 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmin)1206 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmin) { 1207 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1208 for (uint32_t channels = 9; channels < 16; channels++) { 1209 DWConvMicrokernelTester() 1210 .cr(8) 1211 .kr(25) 1212 .channels(channels) 1213 .qmin(128) 1214 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1215 } 1216 } 1217 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmax)1218 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmax) { 1219 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1220 for (uint32_t channels = 9; channels < 16; channels++) { 1221 DWConvMicrokernelTester() 1222 .cr(8) 1223 .kr(25) 1224 .channels(channels) 1225 .qmax(128) 1226 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1227 } 1228 } 1229 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel)1230 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel) { 1231 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1232 for (size_t channels = 1; channels <= 40; channels += 7) { 1233 DWConvMicrokernelTester() 1234 .cr(8) 1235 .kr(25) 1236 .channels(channels) 1237 .width(3) 1238 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1239 } 1240 } 1241 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_step)1242 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_step) { 1243 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1244 for (size_t channels = 1; channels <= 40; channels += 7) { 1245 for (size_t step = 2; step <= 25; step++) { 1246 DWConvMicrokernelTester() 1247 .cr(8) 1248 .kr(25) 1249 .channels(channels) 1250 .width(3) 1251 .step(step) 1252 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1253 } 1254 } 1255 } 1256 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_output_stride)1257 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_output_stride) { 1258 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1259 for (size_t channels = 1; channels <= 40; channels += 7) { 1260 DWConvMicrokernelTester() 1261 .cr(8) 1262 .kr(25) 1263 .channels(8) 1264 .width(5) 1265 .output_stride(43) 1266 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1267 } 1268 } 1269 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmin)1270 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmin) { 1271 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1272 for (size_t channels = 1; channels <= 40; channels += 7) { 1273 DWConvMicrokernelTester() 1274 .cr(8) 1275 .kr(25) 1276 .channels(channels) 1277 .width(3) 1278 .qmin(128) 1279 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1280 } 1281 } 1282 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmax)1283 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmax) { 1284 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1285 for (size_t channels = 1; channels <= 40; channels += 7) { 1286 DWConvMicrokernelTester() 1287 .cr(8) 1288 .kr(25) 1289 .channels(channels) 1290 .width(3) 1291 .qmax(128) 1292 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1293 } 1294 } 1295 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,input_offset)1296 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, input_offset) { 1297 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1298 for (uint32_t channels = 16; channels < 128; channels += 24) { 1299 DWConvMicrokernelTester() 1300 .cr(8) 1301 .kr(25) 1302 .channels(channels) 1303 .input_offset(176) 1304 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1305 } 1306 } 1307 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,zero)1308 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, zero) { 1309 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1310 for (uint32_t mz = 0; mz < 25; mz++) { 1311 for (uint32_t channels = 16; channels < 128; channels += 24) { 1312 DWConvMicrokernelTester() 1313 .cr(8) 1314 .kr(25) 1315 .channels(channels) 1316 .input_offset(176) 1317 .zero_index(mz) 1318 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 1319 } 1320 } 1321 } 1322 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 1323 1324 1325 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_eq_8)1326 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_eq_8) { 1327 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1328 DWConvMicrokernelTester() 1329 .cr(8) 1330 .kr(25) 1331 .channels(8) 1332 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1333 } 1334 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8)1335 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8) { 1336 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1337 for (uint32_t channels = 16; channels < 128; channels += 24) { 1338 DWConvMicrokernelTester() 1339 .cr(8) 1340 .kr(25) 1341 .channels(channels) 1342 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1343 } 1344 } 1345 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmin)1346 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 1347 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1348 for (uint32_t channels = 16; channels < 128; channels += 24) { 1349 DWConvMicrokernelTester() 1350 .cr(8) 1351 .kr(25) 1352 .channels(channels) 1353 .qmin(128) 1354 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1355 } 1356 } 1357 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmax)1358 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 1359 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1360 for (uint32_t channels = 16; channels < 128; channels += 24) { 1361 DWConvMicrokernelTester() 1362 .cr(8) 1363 .kr(25) 1364 .channels(channels) 1365 .qmax(128) 1366 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1367 } 1368 } 1369 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_lt_8)1370 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_lt_8) { 1371 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1372 for (uint32_t channels = 1; channels < 8; channels++) { 1373 DWConvMicrokernelTester() 1374 .cr(8) 1375 .kr(25) 1376 .channels(channels) 1377 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1378 } 1379 } 1380 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8)1381 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8) { 1382 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1383 for (uint32_t channels = 9; channels < 16; channels++) { 1384 DWConvMicrokernelTester() 1385 .cr(8) 1386 .kr(25) 1387 .channels(channels) 1388 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1389 } 1390 } 1391 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1392 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 1393 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1394 for (uint32_t channels = 9; channels < 16; channels++) { 1395 DWConvMicrokernelTester() 1396 .cr(8) 1397 .kr(25) 1398 .channels(channels) 1399 .qmin(128) 1400 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1401 } 1402 } 1403 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1404 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 1405 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1406 for (uint32_t channels = 9; channels < 16; channels++) { 1407 DWConvMicrokernelTester() 1408 .cr(8) 1409 .kr(25) 1410 .channels(channels) 1411 .qmax(128) 1412 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1413 } 1414 } 1415 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel)1416 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel) { 1417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1418 for (size_t channels = 1; channels <= 40; channels += 7) { 1419 DWConvMicrokernelTester() 1420 .cr(8) 1421 .kr(25) 1422 .channels(channels) 1423 .width(3) 1424 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1425 } 1426 } 1427 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_step)1428 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_step) { 1429 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1430 for (size_t channels = 1; channels <= 40; channels += 7) { 1431 for (size_t step = 2; step <= 25; step++) { 1432 DWConvMicrokernelTester() 1433 .cr(8) 1434 .kr(25) 1435 .channels(channels) 1436 .width(3) 1437 .step(step) 1438 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1439 } 1440 } 1441 } 1442 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1443 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 1444 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1445 for (size_t channels = 1; channels <= 40; channels += 7) { 1446 DWConvMicrokernelTester() 1447 .cr(8) 1448 .kr(25) 1449 .channels(8) 1450 .width(5) 1451 .output_stride(43) 1452 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1453 } 1454 } 1455 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)1456 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 1457 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1458 for (size_t channels = 1; channels <= 40; channels += 7) { 1459 DWConvMicrokernelTester() 1460 .cr(8) 1461 .kr(25) 1462 .channels(channels) 1463 .width(3) 1464 .qmin(128) 1465 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1466 } 1467 } 1468 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)1469 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 1470 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1471 for (size_t channels = 1; channels <= 40; channels += 7) { 1472 DWConvMicrokernelTester() 1473 .cr(8) 1474 .kr(25) 1475 .channels(channels) 1476 .width(3) 1477 .qmax(128) 1478 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1479 } 1480 } 1481 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,input_offset)1482 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, input_offset) { 1483 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1484 for (uint32_t channels = 16; channels < 128; channels += 24) { 1485 DWConvMicrokernelTester() 1486 .cr(8) 1487 .kr(25) 1488 .channels(channels) 1489 .input_offset(176) 1490 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1491 } 1492 } 1493 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,zero)1494 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, zero) { 1495 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1496 for (uint32_t mz = 0; mz < 25; mz++) { 1497 for (uint32_t channels = 16; channels < 128; channels += 24) { 1498 DWConvMicrokernelTester() 1499 .cr(8) 1500 .kr(25) 1501 .channels(channels) 1502 .input_offset(176) 1503 .zero_index(mz) 1504 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1505 } 1506 } 1507 } 1508 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 1509 1510 1511 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_eq_16)1512 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_eq_16) { 1513 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1514 DWConvMicrokernelTester() 1515 .cr(16) 1516 .kr(3) 1517 .channels(16) 1518 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1519 } 1520 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_div_16)1521 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_div_16) { 1522 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1523 for (uint32_t channels = 32; channels < 256; channels += 48) { 1524 DWConvMicrokernelTester() 1525 .cr(16) 1526 .kr(3) 1527 .channels(channels) 1528 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1529 } 1530 } 1531 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_div_16_with_qmin)1532 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_div_16_with_qmin) { 1533 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1534 for (uint32_t channels = 32; channels < 256; channels += 48) { 1535 DWConvMicrokernelTester() 1536 .cr(16) 1537 .kr(3) 1538 .channels(channels) 1539 .qmin(128) 1540 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1541 } 1542 } 1543 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_div_16_with_qmax)1544 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_div_16_with_qmax) { 1545 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1546 for (uint32_t channels = 32; channels < 256; channels += 48) { 1547 DWConvMicrokernelTester() 1548 .cr(16) 1549 .kr(3) 1550 .channels(channels) 1551 .qmax(128) 1552 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1553 } 1554 } 1555 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_lt_16)1556 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_lt_16) { 1557 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1558 for (uint32_t channels = 1; channels < 16; channels++) { 1559 DWConvMicrokernelTester() 1560 .cr(16) 1561 .kr(3) 1562 .channels(channels) 1563 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1564 } 1565 } 1566 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_gt_16)1567 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_gt_16) { 1568 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1569 for (uint32_t channels = 17; channels < 32; channels++) { 1570 DWConvMicrokernelTester() 1571 .cr(16) 1572 .kr(3) 1573 .channels(channels) 1574 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1575 } 1576 } 1577 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_gt_16_with_qmin)1578 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_gt_16_with_qmin) { 1579 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1580 for (uint32_t channels = 17; channels < 32; channels++) { 1581 DWConvMicrokernelTester() 1582 .cr(16) 1583 .kr(3) 1584 .channels(channels) 1585 .qmin(128) 1586 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1587 } 1588 } 1589 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,c_gt_16_with_qmax)1590 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, c_gt_16_with_qmax) { 1591 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1592 for (uint32_t channels = 17; channels < 32; channels++) { 1593 DWConvMicrokernelTester() 1594 .cr(16) 1595 .kr(3) 1596 .channels(channels) 1597 .qmax(128) 1598 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1599 } 1600 } 1601 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel)1602 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel) { 1603 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1604 for (size_t channels = 1; channels <= 80; channels += 15) { 1605 DWConvMicrokernelTester() 1606 .cr(16) 1607 .kr(3) 1608 .channels(channels) 1609 .width(3) 1610 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1611 } 1612 } 1613 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_step)1614 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_step) { 1615 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1616 for (size_t channels = 1; channels <= 80; channels += 15) { 1617 for (size_t step = 2; step <= 3; step++) { 1618 DWConvMicrokernelTester() 1619 .cr(16) 1620 .kr(3) 1621 .channels(channels) 1622 .width(3) 1623 .step(step) 1624 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1625 } 1626 } 1627 } 1628 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_output_stride)1629 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_output_stride) { 1630 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1631 for (size_t channels = 1; channels <= 80; channels += 15) { 1632 DWConvMicrokernelTester() 1633 .cr(16) 1634 .kr(3) 1635 .channels(16) 1636 .width(5) 1637 .output_stride(83) 1638 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1639 } 1640 } 1641 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_qmin)1642 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_qmin) { 1643 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1644 for (size_t channels = 1; channels <= 80; channels += 15) { 1645 DWConvMicrokernelTester() 1646 .cr(16) 1647 .kr(3) 1648 .channels(channels) 1649 .width(3) 1650 .qmin(128) 1651 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1652 } 1653 } 1654 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,multipixel_with_qmax)1655 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, multipixel_with_qmax) { 1656 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1657 for (size_t channels = 1; channels <= 80; channels += 15) { 1658 DWConvMicrokernelTester() 1659 .cr(16) 1660 .kr(3) 1661 .channels(channels) 1662 .width(3) 1663 .qmax(128) 1664 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1665 } 1666 } 1667 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,input_offset)1668 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, input_offset) { 1669 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1670 for (uint32_t channels = 32; channels < 256; channels += 48) { 1671 DWConvMicrokernelTester() 1672 .cr(16) 1673 .kr(3) 1674 .channels(channels) 1675 .input_offset(304) 1676 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1677 } 1678 } 1679 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH,zero)1680 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH, zero) { 1681 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1682 for (uint32_t mz = 0; mz < 3; mz++) { 1683 for (uint32_t channels = 32; channels < 256; channels += 48) { 1684 DWConvMicrokernelTester() 1685 .cr(16) 1686 .kr(3) 1687 .channels(channels) 1688 .input_offset(304) 1689 .zero_index(mz) 1690 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 1691 } 1692 } 1693 } 1694 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 1695 1696 1697 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_eq_16)1698 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_eq_16) { 1699 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1700 DWConvMicrokernelTester() 1701 .cr(16) 1702 .kr(3) 1703 .channels(16) 1704 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1705 } 1706 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_div_16)1707 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_div_16) { 1708 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1709 for (uint32_t channels = 32; channels < 256; channels += 48) { 1710 DWConvMicrokernelTester() 1711 .cr(16) 1712 .kr(3) 1713 .channels(channels) 1714 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1715 } 1716 } 1717 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_div_16_with_qmin)1718 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 1719 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1720 for (uint32_t channels = 32; channels < 256; channels += 48) { 1721 DWConvMicrokernelTester() 1722 .cr(16) 1723 .kr(3) 1724 .channels(channels) 1725 .qmin(128) 1726 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1727 } 1728 } 1729 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_div_16_with_qmax)1730 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 1731 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1732 for (uint32_t channels = 32; channels < 256; channels += 48) { 1733 DWConvMicrokernelTester() 1734 .cr(16) 1735 .kr(3) 1736 .channels(channels) 1737 .qmax(128) 1738 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1739 } 1740 } 1741 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_lt_16)1742 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_lt_16) { 1743 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1744 for (uint32_t channels = 1; channels < 16; channels++) { 1745 DWConvMicrokernelTester() 1746 .cr(16) 1747 .kr(3) 1748 .channels(channels) 1749 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1750 } 1751 } 1752 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_gt_16)1753 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_gt_16) { 1754 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1755 for (uint32_t channels = 17; channels < 32; channels++) { 1756 DWConvMicrokernelTester() 1757 .cr(16) 1758 .kr(3) 1759 .channels(channels) 1760 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1761 } 1762 } 1763 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)1764 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 1765 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1766 for (uint32_t channels = 17; channels < 32; channels++) { 1767 DWConvMicrokernelTester() 1768 .cr(16) 1769 .kr(3) 1770 .channels(channels) 1771 .qmin(128) 1772 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1773 } 1774 } 1775 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)1776 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 1777 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1778 for (uint32_t channels = 17; channels < 32; channels++) { 1779 DWConvMicrokernelTester() 1780 .cr(16) 1781 .kr(3) 1782 .channels(channels) 1783 .qmax(128) 1784 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1785 } 1786 } 1787 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel)1788 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel) { 1789 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1790 for (size_t channels = 1; channels <= 80; channels += 15) { 1791 DWConvMicrokernelTester() 1792 .cr(16) 1793 .kr(3) 1794 .channels(channels) 1795 .width(3) 1796 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1797 } 1798 } 1799 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_step)1800 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_step) { 1801 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1802 for (size_t channels = 1; channels <= 80; channels += 15) { 1803 for (size_t step = 2; step <= 3; step++) { 1804 DWConvMicrokernelTester() 1805 .cr(16) 1806 .kr(3) 1807 .channels(channels) 1808 .width(3) 1809 .step(step) 1810 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1811 } 1812 } 1813 } 1814 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1815 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 1816 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1817 for (size_t channels = 1; channels <= 80; channels += 15) { 1818 DWConvMicrokernelTester() 1819 .cr(16) 1820 .kr(3) 1821 .channels(16) 1822 .width(5) 1823 .output_stride(83) 1824 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1825 } 1826 } 1827 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_qmin)1828 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 1829 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1830 for (size_t channels = 1; channels <= 80; channels += 15) { 1831 DWConvMicrokernelTester() 1832 .cr(16) 1833 .kr(3) 1834 .channels(channels) 1835 .width(3) 1836 .qmin(128) 1837 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1838 } 1839 } 1840 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,multipixel_with_qmax)1841 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 1842 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1843 for (size_t channels = 1; channels <= 80; channels += 15) { 1844 DWConvMicrokernelTester() 1845 .cr(16) 1846 .kr(3) 1847 .channels(channels) 1848 .width(3) 1849 .qmax(128) 1850 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1851 } 1852 } 1853 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,input_offset)1854 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, input_offset) { 1855 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1856 for (uint32_t channels = 32; channels < 256; channels += 48) { 1857 DWConvMicrokernelTester() 1858 .cr(16) 1859 .kr(3) 1860 .channels(channels) 1861 .input_offset(304) 1862 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1863 } 1864 } 1865 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2,zero)1866 TEST(F16_DWCONV_MINMAX_UP16X3__NEONFP16ARITH_ACC2, zero) { 1867 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1868 for (uint32_t mz = 0; mz < 3; mz++) { 1869 for (uint32_t channels = 32; channels < 256; channels += 48) { 1870 DWConvMicrokernelTester() 1871 .cr(16) 1872 .kr(3) 1873 .channels(channels) 1874 .input_offset(304) 1875 .zero_index(mz) 1876 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 1877 } 1878 } 1879 } 1880 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 1881 1882 1883 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_eq_16)1884 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_eq_16) { 1885 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1886 DWConvMicrokernelTester() 1887 .cr(16) 1888 .kr(4) 1889 .channels(16) 1890 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1891 } 1892 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16)1893 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16) { 1894 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1895 for (uint32_t channels = 32; channels < 256; channels += 48) { 1896 DWConvMicrokernelTester() 1897 .cr(16) 1898 .kr(4) 1899 .channels(channels) 1900 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1901 } 1902 } 1903 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmin)1904 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmin) { 1905 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1906 for (uint32_t channels = 32; channels < 256; channels += 48) { 1907 DWConvMicrokernelTester() 1908 .cr(16) 1909 .kr(4) 1910 .channels(channels) 1911 .qmin(128) 1912 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1913 } 1914 } 1915 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmax)1916 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmax) { 1917 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1918 for (uint32_t channels = 32; channels < 256; channels += 48) { 1919 DWConvMicrokernelTester() 1920 .cr(16) 1921 .kr(4) 1922 .channels(channels) 1923 .qmax(128) 1924 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1925 } 1926 } 1927 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_lt_16)1928 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_lt_16) { 1929 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1930 for (uint32_t channels = 1; channels < 16; channels++) { 1931 DWConvMicrokernelTester() 1932 .cr(16) 1933 .kr(4) 1934 .channels(channels) 1935 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1936 } 1937 } 1938 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16)1939 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16) { 1940 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1941 for (uint32_t channels = 17; channels < 32; channels++) { 1942 DWConvMicrokernelTester() 1943 .cr(16) 1944 .kr(4) 1945 .channels(channels) 1946 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1947 } 1948 } 1949 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmin)1950 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmin) { 1951 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1952 for (uint32_t channels = 17; channels < 32; channels++) { 1953 DWConvMicrokernelTester() 1954 .cr(16) 1955 .kr(4) 1956 .channels(channels) 1957 .qmin(128) 1958 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1959 } 1960 } 1961 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmax)1962 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmax) { 1963 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1964 for (uint32_t channels = 17; channels < 32; channels++) { 1965 DWConvMicrokernelTester() 1966 .cr(16) 1967 .kr(4) 1968 .channels(channels) 1969 .qmax(128) 1970 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1971 } 1972 } 1973 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel)1974 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel) { 1975 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1976 for (size_t channels = 1; channels <= 80; channels += 15) { 1977 DWConvMicrokernelTester() 1978 .cr(16) 1979 .kr(4) 1980 .channels(channels) 1981 .width(3) 1982 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1983 } 1984 } 1985 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_step)1986 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_step) { 1987 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1988 for (size_t channels = 1; channels <= 80; channels += 15) { 1989 for (size_t step = 2; step <= 4; step++) { 1990 DWConvMicrokernelTester() 1991 .cr(16) 1992 .kr(4) 1993 .channels(channels) 1994 .width(3) 1995 .step(step) 1996 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 1997 } 1998 } 1999 } 2000 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_output_stride)2001 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_output_stride) { 2002 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2003 for (size_t channels = 1; channels <= 80; channels += 15) { 2004 DWConvMicrokernelTester() 2005 .cr(16) 2006 .kr(4) 2007 .channels(16) 2008 .width(5) 2009 .output_stride(83) 2010 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 2011 } 2012 } 2013 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmin)2014 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmin) { 2015 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2016 for (size_t channels = 1; channels <= 80; channels += 15) { 2017 DWConvMicrokernelTester() 2018 .cr(16) 2019 .kr(4) 2020 .channels(channels) 2021 .width(3) 2022 .qmin(128) 2023 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 2024 } 2025 } 2026 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmax)2027 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmax) { 2028 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2029 for (size_t channels = 1; channels <= 80; channels += 15) { 2030 DWConvMicrokernelTester() 2031 .cr(16) 2032 .kr(4) 2033 .channels(channels) 2034 .width(3) 2035 .qmax(128) 2036 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 2037 } 2038 } 2039 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,input_offset)2040 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, input_offset) { 2041 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2042 for (uint32_t channels = 32; channels < 256; channels += 48) { 2043 DWConvMicrokernelTester() 2044 .cr(16) 2045 .kr(4) 2046 .channels(channels) 2047 .input_offset(304) 2048 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 2049 } 2050 } 2051 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,zero)2052 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, zero) { 2053 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2054 for (uint32_t mz = 0; mz < 4; mz++) { 2055 for (uint32_t channels = 32; channels < 256; channels += 48) { 2056 DWConvMicrokernelTester() 2057 .cr(16) 2058 .kr(4) 2059 .channels(channels) 2060 .input_offset(304) 2061 .zero_index(mz) 2062 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 2063 } 2064 } 2065 } 2066 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 2067 2068 2069 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_eq_16)2070 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_eq_16) { 2071 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2072 DWConvMicrokernelTester() 2073 .cr(16) 2074 .kr(4) 2075 .channels(16) 2076 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2077 } 2078 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16)2079 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16) { 2080 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2081 for (uint32_t channels = 32; channels < 256; channels += 48) { 2082 DWConvMicrokernelTester() 2083 .cr(16) 2084 .kr(4) 2085 .channels(channels) 2086 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2087 } 2088 } 2089 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2090 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 2091 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2092 for (uint32_t channels = 32; channels < 256; channels += 48) { 2093 DWConvMicrokernelTester() 2094 .cr(16) 2095 .kr(4) 2096 .channels(channels) 2097 .qmin(128) 2098 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2099 } 2100 } 2101 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2102 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 2103 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2104 for (uint32_t channels = 32; channels < 256; channels += 48) { 2105 DWConvMicrokernelTester() 2106 .cr(16) 2107 .kr(4) 2108 .channels(channels) 2109 .qmax(128) 2110 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2111 } 2112 } 2113 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_lt_16)2114 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_lt_16) { 2115 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2116 for (uint32_t channels = 1; channels < 16; channels++) { 2117 DWConvMicrokernelTester() 2118 .cr(16) 2119 .kr(4) 2120 .channels(channels) 2121 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2122 } 2123 } 2124 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16)2125 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16) { 2126 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2127 for (uint32_t channels = 17; channels < 32; channels++) { 2128 DWConvMicrokernelTester() 2129 .cr(16) 2130 .kr(4) 2131 .channels(channels) 2132 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2133 } 2134 } 2135 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2136 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 2137 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2138 for (uint32_t channels = 17; channels < 32; channels++) { 2139 DWConvMicrokernelTester() 2140 .cr(16) 2141 .kr(4) 2142 .channels(channels) 2143 .qmin(128) 2144 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2145 } 2146 } 2147 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2148 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 2149 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2150 for (uint32_t channels = 17; channels < 32; channels++) { 2151 DWConvMicrokernelTester() 2152 .cr(16) 2153 .kr(4) 2154 .channels(channels) 2155 .qmax(128) 2156 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2157 } 2158 } 2159 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel)2160 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel) { 2161 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2162 for (size_t channels = 1; channels <= 80; channels += 15) { 2163 DWConvMicrokernelTester() 2164 .cr(16) 2165 .kr(4) 2166 .channels(channels) 2167 .width(3) 2168 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2169 } 2170 } 2171 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_step)2172 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_step) { 2173 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2174 for (size_t channels = 1; channels <= 80; channels += 15) { 2175 for (size_t step = 2; step <= 4; step++) { 2176 DWConvMicrokernelTester() 2177 .cr(16) 2178 .kr(4) 2179 .channels(channels) 2180 .width(3) 2181 .step(step) 2182 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2183 } 2184 } 2185 } 2186 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2187 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 2188 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2189 for (size_t channels = 1; channels <= 80; channels += 15) { 2190 DWConvMicrokernelTester() 2191 .cr(16) 2192 .kr(4) 2193 .channels(16) 2194 .width(5) 2195 .output_stride(83) 2196 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2197 } 2198 } 2199 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)2200 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 2201 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2202 for (size_t channels = 1; channels <= 80; channels += 15) { 2203 DWConvMicrokernelTester() 2204 .cr(16) 2205 .kr(4) 2206 .channels(channels) 2207 .width(3) 2208 .qmin(128) 2209 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2210 } 2211 } 2212 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)2213 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 2214 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2215 for (size_t channels = 1; channels <= 80; channels += 15) { 2216 DWConvMicrokernelTester() 2217 .cr(16) 2218 .kr(4) 2219 .channels(channels) 2220 .width(3) 2221 .qmax(128) 2222 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2223 } 2224 } 2225 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,input_offset)2226 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, input_offset) { 2227 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2228 for (uint32_t channels = 32; channels < 256; channels += 48) { 2229 DWConvMicrokernelTester() 2230 .cr(16) 2231 .kr(4) 2232 .channels(channels) 2233 .input_offset(304) 2234 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2235 } 2236 } 2237 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,zero)2238 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, zero) { 2239 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2240 for (uint32_t mz = 0; mz < 4; mz++) { 2241 for (uint32_t channels = 32; channels < 256; channels += 48) { 2242 DWConvMicrokernelTester() 2243 .cr(16) 2244 .kr(4) 2245 .channels(channels) 2246 .input_offset(304) 2247 .zero_index(mz) 2248 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2249 } 2250 } 2251 } 2252 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 2253 2254 2255 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_eq_16)2256 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_eq_16) { 2257 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2258 DWConvMicrokernelTester() 2259 .cr(16) 2260 .kr(9) 2261 .channels(16) 2262 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2263 } 2264 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16)2265 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16) { 2266 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2267 for (uint32_t channels = 32; channels < 256; channels += 48) { 2268 DWConvMicrokernelTester() 2269 .cr(16) 2270 .kr(9) 2271 .channels(channels) 2272 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2273 } 2274 } 2275 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmin)2276 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmin) { 2277 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2278 for (uint32_t channels = 32; channels < 256; channels += 48) { 2279 DWConvMicrokernelTester() 2280 .cr(16) 2281 .kr(9) 2282 .channels(channels) 2283 .qmin(128) 2284 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2285 } 2286 } 2287 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmax)2288 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmax) { 2289 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2290 for (uint32_t channels = 32; channels < 256; channels += 48) { 2291 DWConvMicrokernelTester() 2292 .cr(16) 2293 .kr(9) 2294 .channels(channels) 2295 .qmax(128) 2296 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2297 } 2298 } 2299 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_lt_16)2300 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_lt_16) { 2301 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2302 for (uint32_t channels = 1; channels < 16; channels++) { 2303 DWConvMicrokernelTester() 2304 .cr(16) 2305 .kr(9) 2306 .channels(channels) 2307 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2308 } 2309 } 2310 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16)2311 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16) { 2312 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2313 for (uint32_t channels = 17; channels < 32; channels++) { 2314 DWConvMicrokernelTester() 2315 .cr(16) 2316 .kr(9) 2317 .channels(channels) 2318 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2319 } 2320 } 2321 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmin)2322 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmin) { 2323 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2324 for (uint32_t channels = 17; channels < 32; channels++) { 2325 DWConvMicrokernelTester() 2326 .cr(16) 2327 .kr(9) 2328 .channels(channels) 2329 .qmin(128) 2330 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2331 } 2332 } 2333 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmax)2334 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmax) { 2335 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2336 for (uint32_t channels = 17; channels < 32; channels++) { 2337 DWConvMicrokernelTester() 2338 .cr(16) 2339 .kr(9) 2340 .channels(channels) 2341 .qmax(128) 2342 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2343 } 2344 } 2345 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel)2346 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel) { 2347 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2348 for (size_t channels = 1; channels <= 80; channels += 15) { 2349 DWConvMicrokernelTester() 2350 .cr(16) 2351 .kr(9) 2352 .channels(channels) 2353 .width(3) 2354 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2355 } 2356 } 2357 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_step)2358 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_step) { 2359 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2360 for (size_t channels = 1; channels <= 80; channels += 15) { 2361 for (size_t step = 2; step <= 9; step++) { 2362 DWConvMicrokernelTester() 2363 .cr(16) 2364 .kr(9) 2365 .channels(channels) 2366 .width(3) 2367 .step(step) 2368 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2369 } 2370 } 2371 } 2372 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_output_stride)2373 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_output_stride) { 2374 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2375 for (size_t channels = 1; channels <= 80; channels += 15) { 2376 DWConvMicrokernelTester() 2377 .cr(16) 2378 .kr(9) 2379 .channels(16) 2380 .width(5) 2381 .output_stride(83) 2382 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2383 } 2384 } 2385 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmin)2386 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmin) { 2387 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2388 for (size_t channels = 1; channels <= 80; channels += 15) { 2389 DWConvMicrokernelTester() 2390 .cr(16) 2391 .kr(9) 2392 .channels(channels) 2393 .width(3) 2394 .qmin(128) 2395 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2396 } 2397 } 2398 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmax)2399 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmax) { 2400 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2401 for (size_t channels = 1; channels <= 80; channels += 15) { 2402 DWConvMicrokernelTester() 2403 .cr(16) 2404 .kr(9) 2405 .channels(channels) 2406 .width(3) 2407 .qmax(128) 2408 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2409 } 2410 } 2411 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,input_offset)2412 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, input_offset) { 2413 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2414 for (uint32_t channels = 32; channels < 256; channels += 48) { 2415 DWConvMicrokernelTester() 2416 .cr(16) 2417 .kr(9) 2418 .channels(channels) 2419 .input_offset(304) 2420 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2421 } 2422 } 2423 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,zero)2424 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, zero) { 2425 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2426 for (uint32_t mz = 0; mz < 9; mz++) { 2427 for (uint32_t channels = 32; channels < 256; channels += 48) { 2428 DWConvMicrokernelTester() 2429 .cr(16) 2430 .kr(9) 2431 .channels(channels) 2432 .input_offset(304) 2433 .zero_index(mz) 2434 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 2435 } 2436 } 2437 } 2438 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 2439 2440 2441 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_eq_16)2442 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_eq_16) { 2443 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2444 DWConvMicrokernelTester() 2445 .cr(16) 2446 .kr(9) 2447 .channels(16) 2448 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2449 } 2450 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16)2451 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16) { 2452 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2453 for (uint32_t channels = 32; channels < 256; channels += 48) { 2454 DWConvMicrokernelTester() 2455 .cr(16) 2456 .kr(9) 2457 .channels(channels) 2458 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2459 } 2460 } 2461 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2462 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 2463 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2464 for (uint32_t channels = 32; channels < 256; channels += 48) { 2465 DWConvMicrokernelTester() 2466 .cr(16) 2467 .kr(9) 2468 .channels(channels) 2469 .qmin(128) 2470 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2471 } 2472 } 2473 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2474 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 2475 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2476 for (uint32_t channels = 32; channels < 256; channels += 48) { 2477 DWConvMicrokernelTester() 2478 .cr(16) 2479 .kr(9) 2480 .channels(channels) 2481 .qmax(128) 2482 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2483 } 2484 } 2485 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_lt_16)2486 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_lt_16) { 2487 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2488 for (uint32_t channels = 1; channels < 16; channels++) { 2489 DWConvMicrokernelTester() 2490 .cr(16) 2491 .kr(9) 2492 .channels(channels) 2493 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2494 } 2495 } 2496 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16)2497 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16) { 2498 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2499 for (uint32_t channels = 17; channels < 32; channels++) { 2500 DWConvMicrokernelTester() 2501 .cr(16) 2502 .kr(9) 2503 .channels(channels) 2504 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2505 } 2506 } 2507 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2508 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 2509 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2510 for (uint32_t channels = 17; channels < 32; channels++) { 2511 DWConvMicrokernelTester() 2512 .cr(16) 2513 .kr(9) 2514 .channels(channels) 2515 .qmin(128) 2516 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2517 } 2518 } 2519 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2520 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 2521 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2522 for (uint32_t channels = 17; channels < 32; channels++) { 2523 DWConvMicrokernelTester() 2524 .cr(16) 2525 .kr(9) 2526 .channels(channels) 2527 .qmax(128) 2528 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2529 } 2530 } 2531 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel)2532 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel) { 2533 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2534 for (size_t channels = 1; channels <= 80; channels += 15) { 2535 DWConvMicrokernelTester() 2536 .cr(16) 2537 .kr(9) 2538 .channels(channels) 2539 .width(3) 2540 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2541 } 2542 } 2543 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_step)2544 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_step) { 2545 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2546 for (size_t channels = 1; channels <= 80; channels += 15) { 2547 for (size_t step = 2; step <= 9; step++) { 2548 DWConvMicrokernelTester() 2549 .cr(16) 2550 .kr(9) 2551 .channels(channels) 2552 .width(3) 2553 .step(step) 2554 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2555 } 2556 } 2557 } 2558 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2559 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 2560 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2561 for (size_t channels = 1; channels <= 80; channels += 15) { 2562 DWConvMicrokernelTester() 2563 .cr(16) 2564 .kr(9) 2565 .channels(16) 2566 .width(5) 2567 .output_stride(83) 2568 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2569 } 2570 } 2571 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)2572 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 2573 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2574 for (size_t channels = 1; channels <= 80; channels += 15) { 2575 DWConvMicrokernelTester() 2576 .cr(16) 2577 .kr(9) 2578 .channels(channels) 2579 .width(3) 2580 .qmin(128) 2581 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2582 } 2583 } 2584 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)2585 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 2586 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2587 for (size_t channels = 1; channels <= 80; channels += 15) { 2588 DWConvMicrokernelTester() 2589 .cr(16) 2590 .kr(9) 2591 .channels(channels) 2592 .width(3) 2593 .qmax(128) 2594 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2595 } 2596 } 2597 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,input_offset)2598 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, input_offset) { 2599 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2600 for (uint32_t channels = 32; channels < 256; channels += 48) { 2601 DWConvMicrokernelTester() 2602 .cr(16) 2603 .kr(9) 2604 .channels(channels) 2605 .input_offset(304) 2606 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2607 } 2608 } 2609 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,zero)2610 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, zero) { 2611 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2612 for (uint32_t mz = 0; mz < 9; mz++) { 2613 for (uint32_t channels = 32; channels < 256; channels += 48) { 2614 DWConvMicrokernelTester() 2615 .cr(16) 2616 .kr(9) 2617 .channels(channels) 2618 .input_offset(304) 2619 .zero_index(mz) 2620 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2621 } 2622 } 2623 } 2624 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 2625 2626 2627 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_eq_16)2628 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_eq_16) { 2629 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2630 DWConvMicrokernelTester() 2631 .cr(16) 2632 .kr(25) 2633 .channels(16) 2634 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2635 } 2636 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16)2637 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16) { 2638 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2639 for (uint32_t channels = 32; channels < 256; channels += 48) { 2640 DWConvMicrokernelTester() 2641 .cr(16) 2642 .kr(25) 2643 .channels(channels) 2644 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2645 } 2646 } 2647 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmin)2648 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmin) { 2649 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2650 for (uint32_t channels = 32; channels < 256; channels += 48) { 2651 DWConvMicrokernelTester() 2652 .cr(16) 2653 .kr(25) 2654 .channels(channels) 2655 .qmin(128) 2656 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2657 } 2658 } 2659 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmax)2660 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmax) { 2661 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2662 for (uint32_t channels = 32; channels < 256; channels += 48) { 2663 DWConvMicrokernelTester() 2664 .cr(16) 2665 .kr(25) 2666 .channels(channels) 2667 .qmax(128) 2668 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2669 } 2670 } 2671 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_lt_16)2672 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_lt_16) { 2673 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2674 for (uint32_t channels = 1; channels < 16; channels++) { 2675 DWConvMicrokernelTester() 2676 .cr(16) 2677 .kr(25) 2678 .channels(channels) 2679 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2680 } 2681 } 2682 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16)2683 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16) { 2684 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2685 for (uint32_t channels = 17; channels < 32; channels++) { 2686 DWConvMicrokernelTester() 2687 .cr(16) 2688 .kr(25) 2689 .channels(channels) 2690 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2691 } 2692 } 2693 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmin)2694 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmin) { 2695 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2696 for (uint32_t channels = 17; channels < 32; channels++) { 2697 DWConvMicrokernelTester() 2698 .cr(16) 2699 .kr(25) 2700 .channels(channels) 2701 .qmin(128) 2702 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2703 } 2704 } 2705 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmax)2706 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmax) { 2707 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2708 for (uint32_t channels = 17; channels < 32; channels++) { 2709 DWConvMicrokernelTester() 2710 .cr(16) 2711 .kr(25) 2712 .channels(channels) 2713 .qmax(128) 2714 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2715 } 2716 } 2717 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel)2718 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel) { 2719 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2720 for (size_t channels = 1; channels <= 80; channels += 15) { 2721 DWConvMicrokernelTester() 2722 .cr(16) 2723 .kr(25) 2724 .channels(channels) 2725 .width(3) 2726 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2727 } 2728 } 2729 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_step)2730 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_step) { 2731 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2732 for (size_t channels = 1; channels <= 80; channels += 15) { 2733 for (size_t step = 2; step <= 25; step++) { 2734 DWConvMicrokernelTester() 2735 .cr(16) 2736 .kr(25) 2737 .channels(channels) 2738 .width(3) 2739 .step(step) 2740 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2741 } 2742 } 2743 } 2744 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_output_stride)2745 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_output_stride) { 2746 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2747 for (size_t channels = 1; channels <= 80; channels += 15) { 2748 DWConvMicrokernelTester() 2749 .cr(16) 2750 .kr(25) 2751 .channels(16) 2752 .width(5) 2753 .output_stride(83) 2754 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2755 } 2756 } 2757 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmin)2758 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmin) { 2759 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2760 for (size_t channels = 1; channels <= 80; channels += 15) { 2761 DWConvMicrokernelTester() 2762 .cr(16) 2763 .kr(25) 2764 .channels(channels) 2765 .width(3) 2766 .qmin(128) 2767 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2768 } 2769 } 2770 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmax)2771 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmax) { 2772 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2773 for (size_t channels = 1; channels <= 80; channels += 15) { 2774 DWConvMicrokernelTester() 2775 .cr(16) 2776 .kr(25) 2777 .channels(channels) 2778 .width(3) 2779 .qmax(128) 2780 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2781 } 2782 } 2783 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,input_offset)2784 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, input_offset) { 2785 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2786 for (uint32_t channels = 32; channels < 256; channels += 48) { 2787 DWConvMicrokernelTester() 2788 .cr(16) 2789 .kr(25) 2790 .channels(channels) 2791 .input_offset(304) 2792 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2793 } 2794 } 2795 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,zero)2796 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, zero) { 2797 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2798 for (uint32_t mz = 0; mz < 25; mz++) { 2799 for (uint32_t channels = 32; channels < 256; channels += 48) { 2800 DWConvMicrokernelTester() 2801 .cr(16) 2802 .kr(25) 2803 .channels(channels) 2804 .input_offset(304) 2805 .zero_index(mz) 2806 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 2807 } 2808 } 2809 } 2810 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 2811 2812 2813 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_eq_16)2814 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_eq_16) { 2815 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2816 DWConvMicrokernelTester() 2817 .cr(16) 2818 .kr(25) 2819 .channels(16) 2820 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2821 } 2822 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16)2823 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16) { 2824 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2825 for (uint32_t channels = 32; channels < 256; channels += 48) { 2826 DWConvMicrokernelTester() 2827 .cr(16) 2828 .kr(25) 2829 .channels(channels) 2830 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2831 } 2832 } 2833 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2834 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 2835 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2836 for (uint32_t channels = 32; channels < 256; channels += 48) { 2837 DWConvMicrokernelTester() 2838 .cr(16) 2839 .kr(25) 2840 .channels(channels) 2841 .qmin(128) 2842 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2843 } 2844 } 2845 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2846 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 2847 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2848 for (uint32_t channels = 32; channels < 256; channels += 48) { 2849 DWConvMicrokernelTester() 2850 .cr(16) 2851 .kr(25) 2852 .channels(channels) 2853 .qmax(128) 2854 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2855 } 2856 } 2857 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_lt_16)2858 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_lt_16) { 2859 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2860 for (uint32_t channels = 1; channels < 16; channels++) { 2861 DWConvMicrokernelTester() 2862 .cr(16) 2863 .kr(25) 2864 .channels(channels) 2865 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2866 } 2867 } 2868 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16)2869 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16) { 2870 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2871 for (uint32_t channels = 17; channels < 32; channels++) { 2872 DWConvMicrokernelTester() 2873 .cr(16) 2874 .kr(25) 2875 .channels(channels) 2876 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2877 } 2878 } 2879 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2880 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 2881 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2882 for (uint32_t channels = 17; channels < 32; channels++) { 2883 DWConvMicrokernelTester() 2884 .cr(16) 2885 .kr(25) 2886 .channels(channels) 2887 .qmin(128) 2888 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2889 } 2890 } 2891 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2892 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 2893 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2894 for (uint32_t channels = 17; channels < 32; channels++) { 2895 DWConvMicrokernelTester() 2896 .cr(16) 2897 .kr(25) 2898 .channels(channels) 2899 .qmax(128) 2900 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2901 } 2902 } 2903 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel)2904 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel) { 2905 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2906 for (size_t channels = 1; channels <= 80; channels += 15) { 2907 DWConvMicrokernelTester() 2908 .cr(16) 2909 .kr(25) 2910 .channels(channels) 2911 .width(3) 2912 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2913 } 2914 } 2915 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_step)2916 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_step) { 2917 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2918 for (size_t channels = 1; channels <= 80; channels += 15) { 2919 for (size_t step = 2; step <= 25; step++) { 2920 DWConvMicrokernelTester() 2921 .cr(16) 2922 .kr(25) 2923 .channels(channels) 2924 .width(3) 2925 .step(step) 2926 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2927 } 2928 } 2929 } 2930 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2931 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 2932 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2933 for (size_t channels = 1; channels <= 80; channels += 15) { 2934 DWConvMicrokernelTester() 2935 .cr(16) 2936 .kr(25) 2937 .channels(16) 2938 .width(5) 2939 .output_stride(83) 2940 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2941 } 2942 } 2943 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)2944 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 2945 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2946 for (size_t channels = 1; channels <= 80; channels += 15) { 2947 DWConvMicrokernelTester() 2948 .cr(16) 2949 .kr(25) 2950 .channels(channels) 2951 .width(3) 2952 .qmin(128) 2953 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2954 } 2955 } 2956 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)2957 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 2958 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2959 for (size_t channels = 1; channels <= 80; channels += 15) { 2960 DWConvMicrokernelTester() 2961 .cr(16) 2962 .kr(25) 2963 .channels(channels) 2964 .width(3) 2965 .qmax(128) 2966 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2967 } 2968 } 2969 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,input_offset)2970 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, input_offset) { 2971 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2972 for (uint32_t channels = 32; channels < 256; channels += 48) { 2973 DWConvMicrokernelTester() 2974 .cr(16) 2975 .kr(25) 2976 .channels(channels) 2977 .input_offset(304) 2978 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2979 } 2980 } 2981 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,zero)2982 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, zero) { 2983 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2984 for (uint32_t mz = 0; mz < 25; mz++) { 2985 for (uint32_t channels = 32; channels < 256; channels += 48) { 2986 DWConvMicrokernelTester() 2987 .cr(16) 2988 .kr(25) 2989 .channels(channels) 2990 .input_offset(304) 2991 .zero_index(mz) 2992 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 2993 } 2994 } 2995 } 2996 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 2997 2998 2999 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_eq_32)3000 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_eq_32) { 3001 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3002 DWConvMicrokernelTester() 3003 .cr(32) 3004 .kr(3) 3005 .channels(32) 3006 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3007 } 3008 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_div_32)3009 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_div_32) { 3010 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3011 for (uint32_t channels = 64; channels < 512; channels += 96) { 3012 DWConvMicrokernelTester() 3013 .cr(32) 3014 .kr(3) 3015 .channels(channels) 3016 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3017 } 3018 } 3019 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_div_32_with_qmin)3020 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_div_32_with_qmin) { 3021 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3022 for (uint32_t channels = 64; channels < 512; channels += 96) { 3023 DWConvMicrokernelTester() 3024 .cr(32) 3025 .kr(3) 3026 .channels(channels) 3027 .qmin(128) 3028 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3029 } 3030 } 3031 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_div_32_with_qmax)3032 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_div_32_with_qmax) { 3033 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3034 for (uint32_t channels = 64; channels < 512; channels += 96) { 3035 DWConvMicrokernelTester() 3036 .cr(32) 3037 .kr(3) 3038 .channels(channels) 3039 .qmax(128) 3040 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3041 } 3042 } 3043 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_lt_32)3044 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_lt_32) { 3045 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3046 for (uint32_t channels = 1; channels < 32; channels++) { 3047 DWConvMicrokernelTester() 3048 .cr(32) 3049 .kr(3) 3050 .channels(channels) 3051 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3052 } 3053 } 3054 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_gt_32)3055 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_gt_32) { 3056 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3057 for (uint32_t channels = 33; channels < 64; channels++) { 3058 DWConvMicrokernelTester() 3059 .cr(32) 3060 .kr(3) 3061 .channels(channels) 3062 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3063 } 3064 } 3065 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_gt_32_with_qmin)3066 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_gt_32_with_qmin) { 3067 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3068 for (uint32_t channels = 33; channels < 64; channels++) { 3069 DWConvMicrokernelTester() 3070 .cr(32) 3071 .kr(3) 3072 .channels(channels) 3073 .qmin(128) 3074 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3075 } 3076 } 3077 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,c_gt_32_with_qmax)3078 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, c_gt_32_with_qmax) { 3079 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3080 for (uint32_t channels = 33; channels < 64; channels++) { 3081 DWConvMicrokernelTester() 3082 .cr(32) 3083 .kr(3) 3084 .channels(channels) 3085 .qmax(128) 3086 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3087 } 3088 } 3089 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel)3090 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel) { 3091 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3092 for (size_t channels = 1; channels <= 160; channels += 31) { 3093 DWConvMicrokernelTester() 3094 .cr(32) 3095 .kr(3) 3096 .channels(channels) 3097 .width(3) 3098 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3099 } 3100 } 3101 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_step)3102 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_step) { 3103 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3104 for (size_t channels = 1; channels <= 160; channels += 31) { 3105 for (size_t step = 2; step <= 3; step++) { 3106 DWConvMicrokernelTester() 3107 .cr(32) 3108 .kr(3) 3109 .channels(channels) 3110 .width(3) 3111 .step(step) 3112 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3113 } 3114 } 3115 } 3116 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_output_stride)3117 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_output_stride) { 3118 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3119 for (size_t channels = 1; channels <= 160; channels += 31) { 3120 DWConvMicrokernelTester() 3121 .cr(32) 3122 .kr(3) 3123 .channels(32) 3124 .width(5) 3125 .output_stride(163) 3126 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3127 } 3128 } 3129 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_qmin)3130 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_qmin) { 3131 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3132 for (size_t channels = 1; channels <= 160; channels += 31) { 3133 DWConvMicrokernelTester() 3134 .cr(32) 3135 .kr(3) 3136 .channels(channels) 3137 .width(3) 3138 .qmin(128) 3139 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3140 } 3141 } 3142 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,multipixel_with_qmax)3143 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, multipixel_with_qmax) { 3144 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3145 for (size_t channels = 1; channels <= 160; channels += 31) { 3146 DWConvMicrokernelTester() 3147 .cr(32) 3148 .kr(3) 3149 .channels(channels) 3150 .width(3) 3151 .qmax(128) 3152 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3153 } 3154 } 3155 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,input_offset)3156 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, input_offset) { 3157 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3158 for (uint32_t channels = 64; channels < 512; channels += 96) { 3159 DWConvMicrokernelTester() 3160 .cr(32) 3161 .kr(3) 3162 .channels(channels) 3163 .input_offset(592) 3164 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3165 } 3166 } 3167 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH,zero)3168 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH, zero) { 3169 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3170 for (uint32_t mz = 0; mz < 3; mz++) { 3171 for (uint32_t channels = 64; channels < 512; channels += 96) { 3172 DWConvMicrokernelTester() 3173 .cr(32) 3174 .kr(3) 3175 .channels(channels) 3176 .input_offset(592) 3177 .zero_index(mz) 3178 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith, xnn_init_f16_minmax_neon_params); 3179 } 3180 } 3181 } 3182 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 3183 3184 3185 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_eq_32)3186 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_eq_32) { 3187 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3188 DWConvMicrokernelTester() 3189 .cr(32) 3190 .kr(3) 3191 .channels(32) 3192 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3193 } 3194 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_div_32)3195 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_div_32) { 3196 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3197 for (uint32_t channels = 64; channels < 512; channels += 96) { 3198 DWConvMicrokernelTester() 3199 .cr(32) 3200 .kr(3) 3201 .channels(channels) 3202 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3203 } 3204 } 3205 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_div_32_with_qmin)3206 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_div_32_with_qmin) { 3207 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3208 for (uint32_t channels = 64; channels < 512; channels += 96) { 3209 DWConvMicrokernelTester() 3210 .cr(32) 3211 .kr(3) 3212 .channels(channels) 3213 .qmin(128) 3214 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3215 } 3216 } 3217 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_div_32_with_qmax)3218 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_div_32_with_qmax) { 3219 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3220 for (uint32_t channels = 64; channels < 512; channels += 96) { 3221 DWConvMicrokernelTester() 3222 .cr(32) 3223 .kr(3) 3224 .channels(channels) 3225 .qmax(128) 3226 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3227 } 3228 } 3229 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_lt_32)3230 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_lt_32) { 3231 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3232 for (uint32_t channels = 1; channels < 32; channels++) { 3233 DWConvMicrokernelTester() 3234 .cr(32) 3235 .kr(3) 3236 .channels(channels) 3237 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3238 } 3239 } 3240 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_gt_32)3241 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_gt_32) { 3242 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3243 for (uint32_t channels = 33; channels < 64; channels++) { 3244 DWConvMicrokernelTester() 3245 .cr(32) 3246 .kr(3) 3247 .channels(channels) 3248 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3249 } 3250 } 3251 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)3252 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) { 3253 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3254 for (uint32_t channels = 33; channels < 64; channels++) { 3255 DWConvMicrokernelTester() 3256 .cr(32) 3257 .kr(3) 3258 .channels(channels) 3259 .qmin(128) 3260 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3261 } 3262 } 3263 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)3264 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) { 3265 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3266 for (uint32_t channels = 33; channels < 64; channels++) { 3267 DWConvMicrokernelTester() 3268 .cr(32) 3269 .kr(3) 3270 .channels(channels) 3271 .qmax(128) 3272 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3273 } 3274 } 3275 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel)3276 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel) { 3277 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3278 for (size_t channels = 1; channels <= 160; channels += 31) { 3279 DWConvMicrokernelTester() 3280 .cr(32) 3281 .kr(3) 3282 .channels(channels) 3283 .width(3) 3284 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3285 } 3286 } 3287 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_step)3288 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_step) { 3289 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3290 for (size_t channels = 1; channels <= 160; channels += 31) { 3291 for (size_t step = 2; step <= 3; step++) { 3292 DWConvMicrokernelTester() 3293 .cr(32) 3294 .kr(3) 3295 .channels(channels) 3296 .width(3) 3297 .step(step) 3298 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3299 } 3300 } 3301 } 3302 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_output_stride)3303 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 3304 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3305 for (size_t channels = 1; channels <= 160; channels += 31) { 3306 DWConvMicrokernelTester() 3307 .cr(32) 3308 .kr(3) 3309 .channels(32) 3310 .width(5) 3311 .output_stride(163) 3312 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3313 } 3314 } 3315 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_qmin)3316 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 3317 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3318 for (size_t channels = 1; channels <= 160; channels += 31) { 3319 DWConvMicrokernelTester() 3320 .cr(32) 3321 .kr(3) 3322 .channels(channels) 3323 .width(3) 3324 .qmin(128) 3325 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3326 } 3327 } 3328 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,multipixel_with_qmax)3329 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 3330 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3331 for (size_t channels = 1; channels <= 160; channels += 31) { 3332 DWConvMicrokernelTester() 3333 .cr(32) 3334 .kr(3) 3335 .channels(channels) 3336 .width(3) 3337 .qmax(128) 3338 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3339 } 3340 } 3341 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,input_offset)3342 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, input_offset) { 3343 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3344 for (uint32_t channels = 64; channels < 512; channels += 96) { 3345 DWConvMicrokernelTester() 3346 .cr(32) 3347 .kr(3) 3348 .channels(channels) 3349 .input_offset(592) 3350 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3351 } 3352 } 3353 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2,zero)3354 TEST(F16_DWCONV_MINMAX_UP32X3__NEONFP16ARITH_ACC2, zero) { 3355 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3356 for (uint32_t mz = 0; mz < 3; mz++) { 3357 for (uint32_t channels = 64; channels < 512; channels += 96) { 3358 DWConvMicrokernelTester() 3359 .cr(32) 3360 .kr(3) 3361 .channels(channels) 3362 .input_offset(592) 3363 .zero_index(mz) 3364 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3365 } 3366 } 3367 } 3368 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 3369 3370 3371 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_eq_32)3372 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_eq_32) { 3373 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3374 DWConvMicrokernelTester() 3375 .cr(32) 3376 .kr(4) 3377 .channels(32) 3378 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3379 } 3380 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_div_32)3381 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32) { 3382 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3383 for (uint32_t channels = 64; channels < 512; channels += 96) { 3384 DWConvMicrokernelTester() 3385 .cr(32) 3386 .kr(4) 3387 .channels(channels) 3388 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3389 } 3390 } 3391 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_div_32_with_qmin)3392 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32_with_qmin) { 3393 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3394 for (uint32_t channels = 64; channels < 512; channels += 96) { 3395 DWConvMicrokernelTester() 3396 .cr(32) 3397 .kr(4) 3398 .channels(channels) 3399 .qmin(128) 3400 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3401 } 3402 } 3403 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_div_32_with_qmax)3404 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_div_32_with_qmax) { 3405 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3406 for (uint32_t channels = 64; channels < 512; channels += 96) { 3407 DWConvMicrokernelTester() 3408 .cr(32) 3409 .kr(4) 3410 .channels(channels) 3411 .qmax(128) 3412 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3413 } 3414 } 3415 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_lt_32)3416 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_lt_32) { 3417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3418 for (uint32_t channels = 1; channels < 32; channels++) { 3419 DWConvMicrokernelTester() 3420 .cr(32) 3421 .kr(4) 3422 .channels(channels) 3423 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3424 } 3425 } 3426 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_gt_32)3427 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32) { 3428 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3429 for (uint32_t channels = 33; channels < 64; channels++) { 3430 DWConvMicrokernelTester() 3431 .cr(32) 3432 .kr(4) 3433 .channels(channels) 3434 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3435 } 3436 } 3437 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_gt_32_with_qmin)3438 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32_with_qmin) { 3439 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3440 for (uint32_t channels = 33; channels < 64; channels++) { 3441 DWConvMicrokernelTester() 3442 .cr(32) 3443 .kr(4) 3444 .channels(channels) 3445 .qmin(128) 3446 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3447 } 3448 } 3449 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,c_gt_32_with_qmax)3450 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, c_gt_32_with_qmax) { 3451 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3452 for (uint32_t channels = 33; channels < 64; channels++) { 3453 DWConvMicrokernelTester() 3454 .cr(32) 3455 .kr(4) 3456 .channels(channels) 3457 .qmax(128) 3458 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3459 } 3460 } 3461 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel)3462 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel) { 3463 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3464 for (size_t channels = 1; channels <= 160; channels += 31) { 3465 DWConvMicrokernelTester() 3466 .cr(32) 3467 .kr(4) 3468 .channels(channels) 3469 .width(3) 3470 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3471 } 3472 } 3473 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_step)3474 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_step) { 3475 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3476 for (size_t channels = 1; channels <= 160; channels += 31) { 3477 for (size_t step = 2; step <= 4; step++) { 3478 DWConvMicrokernelTester() 3479 .cr(32) 3480 .kr(4) 3481 .channels(channels) 3482 .width(3) 3483 .step(step) 3484 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3485 } 3486 } 3487 } 3488 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_output_stride)3489 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_output_stride) { 3490 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3491 for (size_t channels = 1; channels <= 160; channels += 31) { 3492 DWConvMicrokernelTester() 3493 .cr(32) 3494 .kr(4) 3495 .channels(32) 3496 .width(5) 3497 .output_stride(163) 3498 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3499 } 3500 } 3501 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_qmin)3502 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_qmin) { 3503 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3504 for (size_t channels = 1; channels <= 160; channels += 31) { 3505 DWConvMicrokernelTester() 3506 .cr(32) 3507 .kr(4) 3508 .channels(channels) 3509 .width(3) 3510 .qmin(128) 3511 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3512 } 3513 } 3514 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,multipixel_with_qmax)3515 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, multipixel_with_qmax) { 3516 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3517 for (size_t channels = 1; channels <= 160; channels += 31) { 3518 DWConvMicrokernelTester() 3519 .cr(32) 3520 .kr(4) 3521 .channels(channels) 3522 .width(3) 3523 .qmax(128) 3524 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3525 } 3526 } 3527 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,input_offset)3528 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, input_offset) { 3529 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3530 for (uint32_t channels = 64; channels < 512; channels += 96) { 3531 DWConvMicrokernelTester() 3532 .cr(32) 3533 .kr(4) 3534 .channels(channels) 3535 .input_offset(592) 3536 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3537 } 3538 } 3539 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH,zero)3540 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH, zero) { 3541 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3542 for (uint32_t mz = 0; mz < 4; mz++) { 3543 for (uint32_t channels = 64; channels < 512; channels += 96) { 3544 DWConvMicrokernelTester() 3545 .cr(32) 3546 .kr(4) 3547 .channels(channels) 3548 .input_offset(592) 3549 .zero_index(mz) 3550 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith, xnn_init_f16_minmax_neon_params); 3551 } 3552 } 3553 } 3554 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 3555 3556 3557 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_eq_32)3558 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_eq_32) { 3559 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3560 DWConvMicrokernelTester() 3561 .cr(32) 3562 .kr(4) 3563 .channels(32) 3564 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3565 } 3566 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_div_32)3567 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32) { 3568 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3569 for (uint32_t channels = 64; channels < 512; channels += 96) { 3570 DWConvMicrokernelTester() 3571 .cr(32) 3572 .kr(4) 3573 .channels(channels) 3574 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3575 } 3576 } 3577 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_div_32_with_qmin)3578 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32_with_qmin) { 3579 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3580 for (uint32_t channels = 64; channels < 512; channels += 96) { 3581 DWConvMicrokernelTester() 3582 .cr(32) 3583 .kr(4) 3584 .channels(channels) 3585 .qmin(128) 3586 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3587 } 3588 } 3589 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_div_32_with_qmax)3590 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_div_32_with_qmax) { 3591 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3592 for (uint32_t channels = 64; channels < 512; channels += 96) { 3593 DWConvMicrokernelTester() 3594 .cr(32) 3595 .kr(4) 3596 .channels(channels) 3597 .qmax(128) 3598 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3599 } 3600 } 3601 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_lt_32)3602 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_lt_32) { 3603 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3604 for (uint32_t channels = 1; channels < 32; channels++) { 3605 DWConvMicrokernelTester() 3606 .cr(32) 3607 .kr(4) 3608 .channels(channels) 3609 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3610 } 3611 } 3612 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_gt_32)3613 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32) { 3614 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3615 for (uint32_t channels = 33; channels < 64; channels++) { 3616 DWConvMicrokernelTester() 3617 .cr(32) 3618 .kr(4) 3619 .channels(channels) 3620 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3621 } 3622 } 3623 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)3624 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) { 3625 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3626 for (uint32_t channels = 33; channels < 64; channels++) { 3627 DWConvMicrokernelTester() 3628 .cr(32) 3629 .kr(4) 3630 .channels(channels) 3631 .qmin(128) 3632 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3633 } 3634 } 3635 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)3636 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) { 3637 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3638 for (uint32_t channels = 33; channels < 64; channels++) { 3639 DWConvMicrokernelTester() 3640 .cr(32) 3641 .kr(4) 3642 .channels(channels) 3643 .qmax(128) 3644 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3645 } 3646 } 3647 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel)3648 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel) { 3649 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3650 for (size_t channels = 1; channels <= 160; channels += 31) { 3651 DWConvMicrokernelTester() 3652 .cr(32) 3653 .kr(4) 3654 .channels(channels) 3655 .width(3) 3656 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3657 } 3658 } 3659 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_step)3660 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_step) { 3661 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3662 for (size_t channels = 1; channels <= 160; channels += 31) { 3663 for (size_t step = 2; step <= 4; step++) { 3664 DWConvMicrokernelTester() 3665 .cr(32) 3666 .kr(4) 3667 .channels(channels) 3668 .width(3) 3669 .step(step) 3670 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3671 } 3672 } 3673 } 3674 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)3675 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 3676 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3677 for (size_t channels = 1; channels <= 160; channels += 31) { 3678 DWConvMicrokernelTester() 3679 .cr(32) 3680 .kr(4) 3681 .channels(32) 3682 .width(5) 3683 .output_stride(163) 3684 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3685 } 3686 } 3687 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)3688 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 3689 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3690 for (size_t channels = 1; channels <= 160; channels += 31) { 3691 DWConvMicrokernelTester() 3692 .cr(32) 3693 .kr(4) 3694 .channels(channels) 3695 .width(3) 3696 .qmin(128) 3697 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3698 } 3699 } 3700 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)3701 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 3702 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3703 for (size_t channels = 1; channels <= 160; channels += 31) { 3704 DWConvMicrokernelTester() 3705 .cr(32) 3706 .kr(4) 3707 .channels(channels) 3708 .width(3) 3709 .qmax(128) 3710 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3711 } 3712 } 3713 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,input_offset)3714 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, input_offset) { 3715 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3716 for (uint32_t channels = 64; channels < 512; channels += 96) { 3717 DWConvMicrokernelTester() 3718 .cr(32) 3719 .kr(4) 3720 .channels(channels) 3721 .input_offset(592) 3722 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3723 } 3724 } 3725 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2,zero)3726 TEST(F16_DWCONV_MINMAX_UP32X4__NEONFP16ARITH_ACC2, zero) { 3727 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3728 for (uint32_t mz = 0; mz < 4; mz++) { 3729 for (uint32_t channels = 64; channels < 512; channels += 96) { 3730 DWConvMicrokernelTester() 3731 .cr(32) 3732 .kr(4) 3733 .channels(channels) 3734 .input_offset(592) 3735 .zero_index(mz) 3736 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3737 } 3738 } 3739 } 3740 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 3741 3742 3743 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_eq_32)3744 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_eq_32) { 3745 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3746 DWConvMicrokernelTester() 3747 .cr(32) 3748 .kr(9) 3749 .channels(32) 3750 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3751 } 3752 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_div_32)3753 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32) { 3754 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3755 for (uint32_t channels = 64; channels < 512; channels += 96) { 3756 DWConvMicrokernelTester() 3757 .cr(32) 3758 .kr(9) 3759 .channels(channels) 3760 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3761 } 3762 } 3763 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_div_32_with_qmin)3764 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32_with_qmin) { 3765 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3766 for (uint32_t channels = 64; channels < 512; channels += 96) { 3767 DWConvMicrokernelTester() 3768 .cr(32) 3769 .kr(9) 3770 .channels(channels) 3771 .qmin(128) 3772 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3773 } 3774 } 3775 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_div_32_with_qmax)3776 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_div_32_with_qmax) { 3777 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3778 for (uint32_t channels = 64; channels < 512; channels += 96) { 3779 DWConvMicrokernelTester() 3780 .cr(32) 3781 .kr(9) 3782 .channels(channels) 3783 .qmax(128) 3784 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3785 } 3786 } 3787 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_lt_32)3788 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_lt_32) { 3789 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3790 for (uint32_t channels = 1; channels < 32; channels++) { 3791 DWConvMicrokernelTester() 3792 .cr(32) 3793 .kr(9) 3794 .channels(channels) 3795 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3796 } 3797 } 3798 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_gt_32)3799 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32) { 3800 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3801 for (uint32_t channels = 33; channels < 64; channels++) { 3802 DWConvMicrokernelTester() 3803 .cr(32) 3804 .kr(9) 3805 .channels(channels) 3806 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3807 } 3808 } 3809 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_gt_32_with_qmin)3810 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32_with_qmin) { 3811 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3812 for (uint32_t channels = 33; channels < 64; channels++) { 3813 DWConvMicrokernelTester() 3814 .cr(32) 3815 .kr(9) 3816 .channels(channels) 3817 .qmin(128) 3818 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3819 } 3820 } 3821 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,c_gt_32_with_qmax)3822 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, c_gt_32_with_qmax) { 3823 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3824 for (uint32_t channels = 33; channels < 64; channels++) { 3825 DWConvMicrokernelTester() 3826 .cr(32) 3827 .kr(9) 3828 .channels(channels) 3829 .qmax(128) 3830 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3831 } 3832 } 3833 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel)3834 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel) { 3835 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3836 for (size_t channels = 1; channels <= 160; channels += 31) { 3837 DWConvMicrokernelTester() 3838 .cr(32) 3839 .kr(9) 3840 .channels(channels) 3841 .width(3) 3842 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3843 } 3844 } 3845 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_step)3846 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_step) { 3847 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3848 for (size_t channels = 1; channels <= 160; channels += 31) { 3849 for (size_t step = 2; step <= 9; step++) { 3850 DWConvMicrokernelTester() 3851 .cr(32) 3852 .kr(9) 3853 .channels(channels) 3854 .width(3) 3855 .step(step) 3856 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3857 } 3858 } 3859 } 3860 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_output_stride)3861 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_output_stride) { 3862 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3863 for (size_t channels = 1; channels <= 160; channels += 31) { 3864 DWConvMicrokernelTester() 3865 .cr(32) 3866 .kr(9) 3867 .channels(32) 3868 .width(5) 3869 .output_stride(163) 3870 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3871 } 3872 } 3873 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_qmin)3874 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_qmin) { 3875 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3876 for (size_t channels = 1; channels <= 160; channels += 31) { 3877 DWConvMicrokernelTester() 3878 .cr(32) 3879 .kr(9) 3880 .channels(channels) 3881 .width(3) 3882 .qmin(128) 3883 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3884 } 3885 } 3886 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,multipixel_with_qmax)3887 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, multipixel_with_qmax) { 3888 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3889 for (size_t channels = 1; channels <= 160; channels += 31) { 3890 DWConvMicrokernelTester() 3891 .cr(32) 3892 .kr(9) 3893 .channels(channels) 3894 .width(3) 3895 .qmax(128) 3896 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3897 } 3898 } 3899 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,input_offset)3900 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, input_offset) { 3901 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3902 for (uint32_t channels = 64; channels < 512; channels += 96) { 3903 DWConvMicrokernelTester() 3904 .cr(32) 3905 .kr(9) 3906 .channels(channels) 3907 .input_offset(592) 3908 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3909 } 3910 } 3911 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH,zero)3912 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH, zero) { 3913 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3914 for (uint32_t mz = 0; mz < 9; mz++) { 3915 for (uint32_t channels = 64; channels < 512; channels += 96) { 3916 DWConvMicrokernelTester() 3917 .cr(32) 3918 .kr(9) 3919 .channels(channels) 3920 .input_offset(592) 3921 .zero_index(mz) 3922 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith, xnn_init_f16_minmax_neon_params); 3923 } 3924 } 3925 } 3926 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 3927 3928 3929 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_eq_32)3930 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_eq_32) { 3931 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3932 DWConvMicrokernelTester() 3933 .cr(32) 3934 .kr(9) 3935 .channels(32) 3936 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3937 } 3938 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_div_32)3939 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32) { 3940 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3941 for (uint32_t channels = 64; channels < 512; channels += 96) { 3942 DWConvMicrokernelTester() 3943 .cr(32) 3944 .kr(9) 3945 .channels(channels) 3946 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3947 } 3948 } 3949 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_div_32_with_qmin)3950 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32_with_qmin) { 3951 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3952 for (uint32_t channels = 64; channels < 512; channels += 96) { 3953 DWConvMicrokernelTester() 3954 .cr(32) 3955 .kr(9) 3956 .channels(channels) 3957 .qmin(128) 3958 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3959 } 3960 } 3961 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_div_32_with_qmax)3962 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_div_32_with_qmax) { 3963 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3964 for (uint32_t channels = 64; channels < 512; channels += 96) { 3965 DWConvMicrokernelTester() 3966 .cr(32) 3967 .kr(9) 3968 .channels(channels) 3969 .qmax(128) 3970 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3971 } 3972 } 3973 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_lt_32)3974 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_lt_32) { 3975 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3976 for (uint32_t channels = 1; channels < 32; channels++) { 3977 DWConvMicrokernelTester() 3978 .cr(32) 3979 .kr(9) 3980 .channels(channels) 3981 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3982 } 3983 } 3984 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_gt_32)3985 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32) { 3986 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3987 for (uint32_t channels = 33; channels < 64; channels++) { 3988 DWConvMicrokernelTester() 3989 .cr(32) 3990 .kr(9) 3991 .channels(channels) 3992 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 3993 } 3994 } 3995 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)3996 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) { 3997 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 3998 for (uint32_t channels = 33; channels < 64; channels++) { 3999 DWConvMicrokernelTester() 4000 .cr(32) 4001 .kr(9) 4002 .channels(channels) 4003 .qmin(128) 4004 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4005 } 4006 } 4007 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)4008 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) { 4009 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4010 for (uint32_t channels = 33; channels < 64; channels++) { 4011 DWConvMicrokernelTester() 4012 .cr(32) 4013 .kr(9) 4014 .channels(channels) 4015 .qmax(128) 4016 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4017 } 4018 } 4019 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel)4020 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel) { 4021 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4022 for (size_t channels = 1; channels <= 160; channels += 31) { 4023 DWConvMicrokernelTester() 4024 .cr(32) 4025 .kr(9) 4026 .channels(channels) 4027 .width(3) 4028 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4029 } 4030 } 4031 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_step)4032 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_step) { 4033 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4034 for (size_t channels = 1; channels <= 160; channels += 31) { 4035 for (size_t step = 2; step <= 9; step++) { 4036 DWConvMicrokernelTester() 4037 .cr(32) 4038 .kr(9) 4039 .channels(channels) 4040 .width(3) 4041 .step(step) 4042 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4043 } 4044 } 4045 } 4046 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)4047 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 4048 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4049 for (size_t channels = 1; channels <= 160; channels += 31) { 4050 DWConvMicrokernelTester() 4051 .cr(32) 4052 .kr(9) 4053 .channels(32) 4054 .width(5) 4055 .output_stride(163) 4056 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4057 } 4058 } 4059 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)4060 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 4061 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4062 for (size_t channels = 1; channels <= 160; channels += 31) { 4063 DWConvMicrokernelTester() 4064 .cr(32) 4065 .kr(9) 4066 .channels(channels) 4067 .width(3) 4068 .qmin(128) 4069 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4070 } 4071 } 4072 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)4073 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 4074 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4075 for (size_t channels = 1; channels <= 160; channels += 31) { 4076 DWConvMicrokernelTester() 4077 .cr(32) 4078 .kr(9) 4079 .channels(channels) 4080 .width(3) 4081 .qmax(128) 4082 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4083 } 4084 } 4085 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,input_offset)4086 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, input_offset) { 4087 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4088 for (uint32_t channels = 64; channels < 512; channels += 96) { 4089 DWConvMicrokernelTester() 4090 .cr(32) 4091 .kr(9) 4092 .channels(channels) 4093 .input_offset(592) 4094 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4095 } 4096 } 4097 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2,zero)4098 TEST(F16_DWCONV_MINMAX_UP32X9__NEONFP16ARITH_ACC2, zero) { 4099 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4100 for (uint32_t mz = 0; mz < 9; mz++) { 4101 for (uint32_t channels = 64; channels < 512; channels += 96) { 4102 DWConvMicrokernelTester() 4103 .cr(32) 4104 .kr(9) 4105 .channels(channels) 4106 .input_offset(592) 4107 .zero_index(mz) 4108 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4109 } 4110 } 4111 } 4112 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 4113 4114 4115 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_eq_32)4116 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_eq_32) { 4117 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4118 DWConvMicrokernelTester() 4119 .cr(32) 4120 .kr(25) 4121 .channels(32) 4122 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4123 } 4124 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_div_32)4125 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32) { 4126 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4127 for (uint32_t channels = 64; channels < 512; channels += 96) { 4128 DWConvMicrokernelTester() 4129 .cr(32) 4130 .kr(25) 4131 .channels(channels) 4132 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4133 } 4134 } 4135 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_div_32_with_qmin)4136 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32_with_qmin) { 4137 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4138 for (uint32_t channels = 64; channels < 512; channels += 96) { 4139 DWConvMicrokernelTester() 4140 .cr(32) 4141 .kr(25) 4142 .channels(channels) 4143 .qmin(128) 4144 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4145 } 4146 } 4147 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_div_32_with_qmax)4148 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_div_32_with_qmax) { 4149 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4150 for (uint32_t channels = 64; channels < 512; channels += 96) { 4151 DWConvMicrokernelTester() 4152 .cr(32) 4153 .kr(25) 4154 .channels(channels) 4155 .qmax(128) 4156 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4157 } 4158 } 4159 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_lt_32)4160 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_lt_32) { 4161 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4162 for (uint32_t channels = 1; channels < 32; channels++) { 4163 DWConvMicrokernelTester() 4164 .cr(32) 4165 .kr(25) 4166 .channels(channels) 4167 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4168 } 4169 } 4170 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_gt_32)4171 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32) { 4172 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4173 for (uint32_t channels = 33; channels < 64; channels++) { 4174 DWConvMicrokernelTester() 4175 .cr(32) 4176 .kr(25) 4177 .channels(channels) 4178 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4179 } 4180 } 4181 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_gt_32_with_qmin)4182 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32_with_qmin) { 4183 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4184 for (uint32_t channels = 33; channels < 64; channels++) { 4185 DWConvMicrokernelTester() 4186 .cr(32) 4187 .kr(25) 4188 .channels(channels) 4189 .qmin(128) 4190 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4191 } 4192 } 4193 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,c_gt_32_with_qmax)4194 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, c_gt_32_with_qmax) { 4195 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4196 for (uint32_t channels = 33; channels < 64; channels++) { 4197 DWConvMicrokernelTester() 4198 .cr(32) 4199 .kr(25) 4200 .channels(channels) 4201 .qmax(128) 4202 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4203 } 4204 } 4205 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel)4206 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel) { 4207 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4208 for (size_t channels = 1; channels <= 160; channels += 31) { 4209 DWConvMicrokernelTester() 4210 .cr(32) 4211 .kr(25) 4212 .channels(channels) 4213 .width(3) 4214 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4215 } 4216 } 4217 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_step)4218 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_step) { 4219 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4220 for (size_t channels = 1; channels <= 160; channels += 31) { 4221 for (size_t step = 2; step <= 25; step++) { 4222 DWConvMicrokernelTester() 4223 .cr(32) 4224 .kr(25) 4225 .channels(channels) 4226 .width(3) 4227 .step(step) 4228 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4229 } 4230 } 4231 } 4232 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_output_stride)4233 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_output_stride) { 4234 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4235 for (size_t channels = 1; channels <= 160; channels += 31) { 4236 DWConvMicrokernelTester() 4237 .cr(32) 4238 .kr(25) 4239 .channels(32) 4240 .width(5) 4241 .output_stride(163) 4242 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4243 } 4244 } 4245 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_qmin)4246 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_qmin) { 4247 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4248 for (size_t channels = 1; channels <= 160; channels += 31) { 4249 DWConvMicrokernelTester() 4250 .cr(32) 4251 .kr(25) 4252 .channels(channels) 4253 .width(3) 4254 .qmin(128) 4255 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4256 } 4257 } 4258 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,multipixel_with_qmax)4259 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, multipixel_with_qmax) { 4260 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4261 for (size_t channels = 1; channels <= 160; channels += 31) { 4262 DWConvMicrokernelTester() 4263 .cr(32) 4264 .kr(25) 4265 .channels(channels) 4266 .width(3) 4267 .qmax(128) 4268 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4269 } 4270 } 4271 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,input_offset)4272 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, input_offset) { 4273 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4274 for (uint32_t channels = 64; channels < 512; channels += 96) { 4275 DWConvMicrokernelTester() 4276 .cr(32) 4277 .kr(25) 4278 .channels(channels) 4279 .input_offset(592) 4280 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4281 } 4282 } 4283 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH,zero)4284 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH, zero) { 4285 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4286 for (uint32_t mz = 0; mz < 25; mz++) { 4287 for (uint32_t channels = 64; channels < 512; channels += 96) { 4288 DWConvMicrokernelTester() 4289 .cr(32) 4290 .kr(25) 4291 .channels(channels) 4292 .input_offset(592) 4293 .zero_index(mz) 4294 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith, xnn_init_f16_minmax_neon_params); 4295 } 4296 } 4297 } 4298 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 4299 4300 4301 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_eq_32)4302 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_eq_32) { 4303 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4304 DWConvMicrokernelTester() 4305 .cr(32) 4306 .kr(25) 4307 .channels(32) 4308 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4309 } 4310 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_div_32)4311 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32) { 4312 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4313 for (uint32_t channels = 64; channels < 512; channels += 96) { 4314 DWConvMicrokernelTester() 4315 .cr(32) 4316 .kr(25) 4317 .channels(channels) 4318 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4319 } 4320 } 4321 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_div_32_with_qmin)4322 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32_with_qmin) { 4323 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4324 for (uint32_t channels = 64; channels < 512; channels += 96) { 4325 DWConvMicrokernelTester() 4326 .cr(32) 4327 .kr(25) 4328 .channels(channels) 4329 .qmin(128) 4330 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4331 } 4332 } 4333 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_div_32_with_qmax)4334 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_div_32_with_qmax) { 4335 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4336 for (uint32_t channels = 64; channels < 512; channels += 96) { 4337 DWConvMicrokernelTester() 4338 .cr(32) 4339 .kr(25) 4340 .channels(channels) 4341 .qmax(128) 4342 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4343 } 4344 } 4345 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_lt_32)4346 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_lt_32) { 4347 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4348 for (uint32_t channels = 1; channels < 32; channels++) { 4349 DWConvMicrokernelTester() 4350 .cr(32) 4351 .kr(25) 4352 .channels(channels) 4353 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4354 } 4355 } 4356 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_gt_32)4357 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32) { 4358 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4359 for (uint32_t channels = 33; channels < 64; channels++) { 4360 DWConvMicrokernelTester() 4361 .cr(32) 4362 .kr(25) 4363 .channels(channels) 4364 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4365 } 4366 } 4367 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_gt_32_with_qmin)4368 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32_with_qmin) { 4369 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4370 for (uint32_t channels = 33; channels < 64; channels++) { 4371 DWConvMicrokernelTester() 4372 .cr(32) 4373 .kr(25) 4374 .channels(channels) 4375 .qmin(128) 4376 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4377 } 4378 } 4379 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,c_gt_32_with_qmax)4380 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, c_gt_32_with_qmax) { 4381 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4382 for (uint32_t channels = 33; channels < 64; channels++) { 4383 DWConvMicrokernelTester() 4384 .cr(32) 4385 .kr(25) 4386 .channels(channels) 4387 .qmax(128) 4388 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4389 } 4390 } 4391 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel)4392 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel) { 4393 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4394 for (size_t channels = 1; channels <= 160; channels += 31) { 4395 DWConvMicrokernelTester() 4396 .cr(32) 4397 .kr(25) 4398 .channels(channels) 4399 .width(3) 4400 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4401 } 4402 } 4403 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_step)4404 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_step) { 4405 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4406 for (size_t channels = 1; channels <= 160; channels += 31) { 4407 for (size_t step = 2; step <= 25; step++) { 4408 DWConvMicrokernelTester() 4409 .cr(32) 4410 .kr(25) 4411 .channels(channels) 4412 .width(3) 4413 .step(step) 4414 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4415 } 4416 } 4417 } 4418 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)4419 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 4420 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4421 for (size_t channels = 1; channels <= 160; channels += 31) { 4422 DWConvMicrokernelTester() 4423 .cr(32) 4424 .kr(25) 4425 .channels(32) 4426 .width(5) 4427 .output_stride(163) 4428 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4429 } 4430 } 4431 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)4432 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 4433 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4434 for (size_t channels = 1; channels <= 160; channels += 31) { 4435 DWConvMicrokernelTester() 4436 .cr(32) 4437 .kr(25) 4438 .channels(channels) 4439 .width(3) 4440 .qmin(128) 4441 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4442 } 4443 } 4444 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)4445 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 4446 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4447 for (size_t channels = 1; channels <= 160; channels += 31) { 4448 DWConvMicrokernelTester() 4449 .cr(32) 4450 .kr(25) 4451 .channels(channels) 4452 .width(3) 4453 .qmax(128) 4454 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4455 } 4456 } 4457 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,input_offset)4458 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, input_offset) { 4459 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4460 for (uint32_t channels = 64; channels < 512; channels += 96) { 4461 DWConvMicrokernelTester() 4462 .cr(32) 4463 .kr(25) 4464 .channels(channels) 4465 .input_offset(592) 4466 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4467 } 4468 } 4469 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2,zero)4470 TEST(F16_DWCONV_MINMAX_UP32X25__NEONFP16ARITH_ACC2, zero) { 4471 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 4472 for (uint32_t mz = 0; mz < 25; mz++) { 4473 for (uint32_t channels = 64; channels < 512; channels += 96) { 4474 DWConvMicrokernelTester() 4475 .cr(32) 4476 .kr(25) 4477 .channels(channels) 4478 .input_offset(592) 4479 .zero_index(mz) 4480 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__neonfp16arith_acc2, xnn_init_f16_minmax_neon_params); 4481 } 4482 } 4483 } 4484 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 4485 4486 4487 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_eq_8)4488 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_eq_8) { 4489 TEST_REQUIRES_X86_FMA3; 4490 DWConvMicrokernelTester() 4491 .cr(8) 4492 .kr(3) 4493 .channels(8) 4494 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4495 } 4496 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_div_8)4497 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_div_8) { 4498 TEST_REQUIRES_X86_FMA3; 4499 for (uint32_t channels = 16; channels < 128; channels += 24) { 4500 DWConvMicrokernelTester() 4501 .cr(8) 4502 .kr(3) 4503 .channels(channels) 4504 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4505 } 4506 } 4507 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_div_8_with_qmin)4508 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_div_8_with_qmin) { 4509 TEST_REQUIRES_X86_FMA3; 4510 for (uint32_t channels = 16; channels < 128; channels += 24) { 4511 DWConvMicrokernelTester() 4512 .cr(8) 4513 .kr(3) 4514 .channels(channels) 4515 .qmin(128) 4516 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4517 } 4518 } 4519 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_div_8_with_qmax)4520 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_div_8_with_qmax) { 4521 TEST_REQUIRES_X86_FMA3; 4522 for (uint32_t channels = 16; channels < 128; channels += 24) { 4523 DWConvMicrokernelTester() 4524 .cr(8) 4525 .kr(3) 4526 .channels(channels) 4527 .qmax(128) 4528 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4529 } 4530 } 4531 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_lt_8)4532 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_lt_8) { 4533 TEST_REQUIRES_X86_FMA3; 4534 for (uint32_t channels = 1; channels < 8; channels++) { 4535 DWConvMicrokernelTester() 4536 .cr(8) 4537 .kr(3) 4538 .channels(channels) 4539 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4540 } 4541 } 4542 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_gt_8)4543 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_gt_8) { 4544 TEST_REQUIRES_X86_FMA3; 4545 for (uint32_t channels = 9; channels < 16; channels++) { 4546 DWConvMicrokernelTester() 4547 .cr(8) 4548 .kr(3) 4549 .channels(channels) 4550 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4551 } 4552 } 4553 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_gt_8_with_qmin)4554 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_gt_8_with_qmin) { 4555 TEST_REQUIRES_X86_FMA3; 4556 for (uint32_t channels = 9; channels < 16; channels++) { 4557 DWConvMicrokernelTester() 4558 .cr(8) 4559 .kr(3) 4560 .channels(channels) 4561 .qmin(128) 4562 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4563 } 4564 } 4565 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,c_gt_8_with_qmax)4566 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, c_gt_8_with_qmax) { 4567 TEST_REQUIRES_X86_FMA3; 4568 for (uint32_t channels = 9; channels < 16; channels++) { 4569 DWConvMicrokernelTester() 4570 .cr(8) 4571 .kr(3) 4572 .channels(channels) 4573 .qmax(128) 4574 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4575 } 4576 } 4577 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel)4578 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel) { 4579 TEST_REQUIRES_X86_FMA3; 4580 for (size_t channels = 1; channels <= 40; channels += 7) { 4581 DWConvMicrokernelTester() 4582 .cr(8) 4583 .kr(3) 4584 .channels(channels) 4585 .width(3) 4586 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4587 } 4588 } 4589 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_step)4590 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_step) { 4591 TEST_REQUIRES_X86_FMA3; 4592 for (size_t channels = 1; channels <= 40; channels += 7) { 4593 for (size_t step = 2; step <= 3; step++) { 4594 DWConvMicrokernelTester() 4595 .cr(8) 4596 .kr(3) 4597 .channels(channels) 4598 .width(3) 4599 .step(step) 4600 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4601 } 4602 } 4603 } 4604 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_output_stride)4605 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_output_stride) { 4606 TEST_REQUIRES_X86_FMA3; 4607 for (size_t channels = 1; channels <= 40; channels += 7) { 4608 DWConvMicrokernelTester() 4609 .cr(8) 4610 .kr(3) 4611 .channels(8) 4612 .width(5) 4613 .output_stride(43) 4614 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4615 } 4616 } 4617 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_qmin)4618 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_qmin) { 4619 TEST_REQUIRES_X86_FMA3; 4620 for (size_t channels = 1; channels <= 40; channels += 7) { 4621 DWConvMicrokernelTester() 4622 .cr(8) 4623 .kr(3) 4624 .channels(channels) 4625 .width(3) 4626 .qmin(128) 4627 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4628 } 4629 } 4630 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,multipixel_with_qmax)4631 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, multipixel_with_qmax) { 4632 TEST_REQUIRES_X86_FMA3; 4633 for (size_t channels = 1; channels <= 40; channels += 7) { 4634 DWConvMicrokernelTester() 4635 .cr(8) 4636 .kr(3) 4637 .channels(channels) 4638 .width(3) 4639 .qmax(128) 4640 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4641 } 4642 } 4643 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,input_offset)4644 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, input_offset) { 4645 TEST_REQUIRES_X86_FMA3; 4646 for (uint32_t channels = 16; channels < 128; channels += 24) { 4647 DWConvMicrokernelTester() 4648 .cr(8) 4649 .kr(3) 4650 .channels(channels) 4651 .input_offset(176) 4652 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4653 } 4654 } 4655 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3,zero)4656 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3, zero) { 4657 TEST_REQUIRES_X86_FMA3; 4658 for (uint32_t mz = 0; mz < 3; mz++) { 4659 for (uint32_t channels = 16; channels < 128; channels += 24) { 4660 DWConvMicrokernelTester() 4661 .cr(8) 4662 .kr(3) 4663 .channels(channels) 4664 .input_offset(176) 4665 .zero_index(mz) 4666 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3, xnn_init_f16_minmax_avx_params); 4667 } 4668 } 4669 } 4670 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 4671 4672 4673 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_eq_8)4674 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_eq_8) { 4675 TEST_REQUIRES_X86_FMA3; 4676 DWConvMicrokernelTester() 4677 .cr(8) 4678 .kr(3) 4679 .channels(8) 4680 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4681 } 4682 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_div_8)4683 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_div_8) { 4684 TEST_REQUIRES_X86_FMA3; 4685 for (uint32_t channels = 16; channels < 128; channels += 24) { 4686 DWConvMicrokernelTester() 4687 .cr(8) 4688 .kr(3) 4689 .channels(channels) 4690 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4691 } 4692 } 4693 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_div_8_with_qmin)4694 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_div_8_with_qmin) { 4695 TEST_REQUIRES_X86_FMA3; 4696 for (uint32_t channels = 16; channels < 128; channels += 24) { 4697 DWConvMicrokernelTester() 4698 .cr(8) 4699 .kr(3) 4700 .channels(channels) 4701 .qmin(128) 4702 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4703 } 4704 } 4705 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_div_8_with_qmax)4706 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_div_8_with_qmax) { 4707 TEST_REQUIRES_X86_FMA3; 4708 for (uint32_t channels = 16; channels < 128; channels += 24) { 4709 DWConvMicrokernelTester() 4710 .cr(8) 4711 .kr(3) 4712 .channels(channels) 4713 .qmax(128) 4714 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4715 } 4716 } 4717 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_lt_8)4718 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_lt_8) { 4719 TEST_REQUIRES_X86_FMA3; 4720 for (uint32_t channels = 1; channels < 8; channels++) { 4721 DWConvMicrokernelTester() 4722 .cr(8) 4723 .kr(3) 4724 .channels(channels) 4725 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4726 } 4727 } 4728 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_gt_8)4729 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_gt_8) { 4730 TEST_REQUIRES_X86_FMA3; 4731 for (uint32_t channels = 9; channels < 16; channels++) { 4732 DWConvMicrokernelTester() 4733 .cr(8) 4734 .kr(3) 4735 .channels(channels) 4736 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4737 } 4738 } 4739 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_gt_8_with_qmin)4740 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_gt_8_with_qmin) { 4741 TEST_REQUIRES_X86_FMA3; 4742 for (uint32_t channels = 9; channels < 16; channels++) { 4743 DWConvMicrokernelTester() 4744 .cr(8) 4745 .kr(3) 4746 .channels(channels) 4747 .qmin(128) 4748 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4749 } 4750 } 4751 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,c_gt_8_with_qmax)4752 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, c_gt_8_with_qmax) { 4753 TEST_REQUIRES_X86_FMA3; 4754 for (uint32_t channels = 9; channels < 16; channels++) { 4755 DWConvMicrokernelTester() 4756 .cr(8) 4757 .kr(3) 4758 .channels(channels) 4759 .qmax(128) 4760 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4761 } 4762 } 4763 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel)4764 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel) { 4765 TEST_REQUIRES_X86_FMA3; 4766 for (size_t channels = 1; channels <= 40; channels += 7) { 4767 DWConvMicrokernelTester() 4768 .cr(8) 4769 .kr(3) 4770 .channels(channels) 4771 .width(3) 4772 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4773 } 4774 } 4775 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_step)4776 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_step) { 4777 TEST_REQUIRES_X86_FMA3; 4778 for (size_t channels = 1; channels <= 40; channels += 7) { 4779 for (size_t step = 2; step <= 3; step++) { 4780 DWConvMicrokernelTester() 4781 .cr(8) 4782 .kr(3) 4783 .channels(channels) 4784 .width(3) 4785 .step(step) 4786 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4787 } 4788 } 4789 } 4790 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_output_stride)4791 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_output_stride) { 4792 TEST_REQUIRES_X86_FMA3; 4793 for (size_t channels = 1; channels <= 40; channels += 7) { 4794 DWConvMicrokernelTester() 4795 .cr(8) 4796 .kr(3) 4797 .channels(8) 4798 .width(5) 4799 .output_stride(43) 4800 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4801 } 4802 } 4803 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_qmin)4804 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_qmin) { 4805 TEST_REQUIRES_X86_FMA3; 4806 for (size_t channels = 1; channels <= 40; channels += 7) { 4807 DWConvMicrokernelTester() 4808 .cr(8) 4809 .kr(3) 4810 .channels(channels) 4811 .width(3) 4812 .qmin(128) 4813 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4814 } 4815 } 4816 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,multipixel_with_qmax)4817 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, multipixel_with_qmax) { 4818 TEST_REQUIRES_X86_FMA3; 4819 for (size_t channels = 1; channels <= 40; channels += 7) { 4820 DWConvMicrokernelTester() 4821 .cr(8) 4822 .kr(3) 4823 .channels(channels) 4824 .width(3) 4825 .qmax(128) 4826 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4827 } 4828 } 4829 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,input_offset)4830 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, input_offset) { 4831 TEST_REQUIRES_X86_FMA3; 4832 for (uint32_t channels = 16; channels < 128; channels += 24) { 4833 DWConvMicrokernelTester() 4834 .cr(8) 4835 .kr(3) 4836 .channels(channels) 4837 .input_offset(176) 4838 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4839 } 4840 } 4841 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2,zero)4842 TEST(F16_DWCONV_MINMAX_UP8X3__FMA3_ACC2, zero) { 4843 TEST_REQUIRES_X86_FMA3; 4844 for (uint32_t mz = 0; mz < 3; mz++) { 4845 for (uint32_t channels = 16; channels < 128; channels += 24) { 4846 DWConvMicrokernelTester() 4847 .cr(8) 4848 .kr(3) 4849 .channels(channels) 4850 .input_offset(176) 4851 .zero_index(mz) 4852 .Test(xnn_f16_dwconv_minmax_ukernel_up8x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 4853 } 4854 } 4855 } 4856 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 4857 4858 4859 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_eq_8)4860 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_eq_8) { 4861 TEST_REQUIRES_X86_FMA3; 4862 DWConvMicrokernelTester() 4863 .cr(8) 4864 .kr(4) 4865 .channels(8) 4866 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4867 } 4868 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_div_8)4869 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8) { 4870 TEST_REQUIRES_X86_FMA3; 4871 for (uint32_t channels = 16; channels < 128; channels += 24) { 4872 DWConvMicrokernelTester() 4873 .cr(8) 4874 .kr(4) 4875 .channels(channels) 4876 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4877 } 4878 } 4879 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_div_8_with_qmin)4880 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8_with_qmin) { 4881 TEST_REQUIRES_X86_FMA3; 4882 for (uint32_t channels = 16; channels < 128; channels += 24) { 4883 DWConvMicrokernelTester() 4884 .cr(8) 4885 .kr(4) 4886 .channels(channels) 4887 .qmin(128) 4888 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4889 } 4890 } 4891 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_div_8_with_qmax)4892 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_div_8_with_qmax) { 4893 TEST_REQUIRES_X86_FMA3; 4894 for (uint32_t channels = 16; channels < 128; channels += 24) { 4895 DWConvMicrokernelTester() 4896 .cr(8) 4897 .kr(4) 4898 .channels(channels) 4899 .qmax(128) 4900 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4901 } 4902 } 4903 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_lt_8)4904 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_lt_8) { 4905 TEST_REQUIRES_X86_FMA3; 4906 for (uint32_t channels = 1; channels < 8; channels++) { 4907 DWConvMicrokernelTester() 4908 .cr(8) 4909 .kr(4) 4910 .channels(channels) 4911 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4912 } 4913 } 4914 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_gt_8)4915 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8) { 4916 TEST_REQUIRES_X86_FMA3; 4917 for (uint32_t channels = 9; channels < 16; channels++) { 4918 DWConvMicrokernelTester() 4919 .cr(8) 4920 .kr(4) 4921 .channels(channels) 4922 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4923 } 4924 } 4925 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_gt_8_with_qmin)4926 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8_with_qmin) { 4927 TEST_REQUIRES_X86_FMA3; 4928 for (uint32_t channels = 9; channels < 16; channels++) { 4929 DWConvMicrokernelTester() 4930 .cr(8) 4931 .kr(4) 4932 .channels(channels) 4933 .qmin(128) 4934 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4935 } 4936 } 4937 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,c_gt_8_with_qmax)4938 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, c_gt_8_with_qmax) { 4939 TEST_REQUIRES_X86_FMA3; 4940 for (uint32_t channels = 9; channels < 16; channels++) { 4941 DWConvMicrokernelTester() 4942 .cr(8) 4943 .kr(4) 4944 .channels(channels) 4945 .qmax(128) 4946 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4947 } 4948 } 4949 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel)4950 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel) { 4951 TEST_REQUIRES_X86_FMA3; 4952 for (size_t channels = 1; channels <= 40; channels += 7) { 4953 DWConvMicrokernelTester() 4954 .cr(8) 4955 .kr(4) 4956 .channels(channels) 4957 .width(3) 4958 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4959 } 4960 } 4961 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_step)4962 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_step) { 4963 TEST_REQUIRES_X86_FMA3; 4964 for (size_t channels = 1; channels <= 40; channels += 7) { 4965 for (size_t step = 2; step <= 4; step++) { 4966 DWConvMicrokernelTester() 4967 .cr(8) 4968 .kr(4) 4969 .channels(channels) 4970 .width(3) 4971 .step(step) 4972 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4973 } 4974 } 4975 } 4976 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_output_stride)4977 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_output_stride) { 4978 TEST_REQUIRES_X86_FMA3; 4979 for (size_t channels = 1; channels <= 40; channels += 7) { 4980 DWConvMicrokernelTester() 4981 .cr(8) 4982 .kr(4) 4983 .channels(8) 4984 .width(5) 4985 .output_stride(43) 4986 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 4987 } 4988 } 4989 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_qmin)4990 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_qmin) { 4991 TEST_REQUIRES_X86_FMA3; 4992 for (size_t channels = 1; channels <= 40; channels += 7) { 4993 DWConvMicrokernelTester() 4994 .cr(8) 4995 .kr(4) 4996 .channels(channels) 4997 .width(3) 4998 .qmin(128) 4999 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 5000 } 5001 } 5002 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,multipixel_with_qmax)5003 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, multipixel_with_qmax) { 5004 TEST_REQUIRES_X86_FMA3; 5005 for (size_t channels = 1; channels <= 40; channels += 7) { 5006 DWConvMicrokernelTester() 5007 .cr(8) 5008 .kr(4) 5009 .channels(channels) 5010 .width(3) 5011 .qmax(128) 5012 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 5013 } 5014 } 5015 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,input_offset)5016 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, input_offset) { 5017 TEST_REQUIRES_X86_FMA3; 5018 for (uint32_t channels = 16; channels < 128; channels += 24) { 5019 DWConvMicrokernelTester() 5020 .cr(8) 5021 .kr(4) 5022 .channels(channels) 5023 .input_offset(176) 5024 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 5025 } 5026 } 5027 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3,zero)5028 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3, zero) { 5029 TEST_REQUIRES_X86_FMA3; 5030 for (uint32_t mz = 0; mz < 4; mz++) { 5031 for (uint32_t channels = 16; channels < 128; channels += 24) { 5032 DWConvMicrokernelTester() 5033 .cr(8) 5034 .kr(4) 5035 .channels(channels) 5036 .input_offset(176) 5037 .zero_index(mz) 5038 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3, xnn_init_f16_minmax_avx_params); 5039 } 5040 } 5041 } 5042 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 5043 5044 5045 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_eq_8)5046 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_eq_8) { 5047 TEST_REQUIRES_X86_FMA3; 5048 DWConvMicrokernelTester() 5049 .cr(8) 5050 .kr(4) 5051 .channels(8) 5052 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5053 } 5054 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_div_8)5055 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8) { 5056 TEST_REQUIRES_X86_FMA3; 5057 for (uint32_t channels = 16; channels < 128; channels += 24) { 5058 DWConvMicrokernelTester() 5059 .cr(8) 5060 .kr(4) 5061 .channels(channels) 5062 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5063 } 5064 } 5065 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_div_8_with_qmin)5066 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8_with_qmin) { 5067 TEST_REQUIRES_X86_FMA3; 5068 for (uint32_t channels = 16; channels < 128; channels += 24) { 5069 DWConvMicrokernelTester() 5070 .cr(8) 5071 .kr(4) 5072 .channels(channels) 5073 .qmin(128) 5074 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5075 } 5076 } 5077 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_div_8_with_qmax)5078 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_div_8_with_qmax) { 5079 TEST_REQUIRES_X86_FMA3; 5080 for (uint32_t channels = 16; channels < 128; channels += 24) { 5081 DWConvMicrokernelTester() 5082 .cr(8) 5083 .kr(4) 5084 .channels(channels) 5085 .qmax(128) 5086 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5087 } 5088 } 5089 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_lt_8)5090 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_lt_8) { 5091 TEST_REQUIRES_X86_FMA3; 5092 for (uint32_t channels = 1; channels < 8; channels++) { 5093 DWConvMicrokernelTester() 5094 .cr(8) 5095 .kr(4) 5096 .channels(channels) 5097 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5098 } 5099 } 5100 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_gt_8)5101 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8) { 5102 TEST_REQUIRES_X86_FMA3; 5103 for (uint32_t channels = 9; channels < 16; channels++) { 5104 DWConvMicrokernelTester() 5105 .cr(8) 5106 .kr(4) 5107 .channels(channels) 5108 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5109 } 5110 } 5111 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_gt_8_with_qmin)5112 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8_with_qmin) { 5113 TEST_REQUIRES_X86_FMA3; 5114 for (uint32_t channels = 9; channels < 16; channels++) { 5115 DWConvMicrokernelTester() 5116 .cr(8) 5117 .kr(4) 5118 .channels(channels) 5119 .qmin(128) 5120 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5121 } 5122 } 5123 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,c_gt_8_with_qmax)5124 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, c_gt_8_with_qmax) { 5125 TEST_REQUIRES_X86_FMA3; 5126 for (uint32_t channels = 9; channels < 16; channels++) { 5127 DWConvMicrokernelTester() 5128 .cr(8) 5129 .kr(4) 5130 .channels(channels) 5131 .qmax(128) 5132 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5133 } 5134 } 5135 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel)5136 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel) { 5137 TEST_REQUIRES_X86_FMA3; 5138 for (size_t channels = 1; channels <= 40; channels += 7) { 5139 DWConvMicrokernelTester() 5140 .cr(8) 5141 .kr(4) 5142 .channels(channels) 5143 .width(3) 5144 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5145 } 5146 } 5147 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_step)5148 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_step) { 5149 TEST_REQUIRES_X86_FMA3; 5150 for (size_t channels = 1; channels <= 40; channels += 7) { 5151 for (size_t step = 2; step <= 4; step++) { 5152 DWConvMicrokernelTester() 5153 .cr(8) 5154 .kr(4) 5155 .channels(channels) 5156 .width(3) 5157 .step(step) 5158 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5159 } 5160 } 5161 } 5162 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_output_stride)5163 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_output_stride) { 5164 TEST_REQUIRES_X86_FMA3; 5165 for (size_t channels = 1; channels <= 40; channels += 7) { 5166 DWConvMicrokernelTester() 5167 .cr(8) 5168 .kr(4) 5169 .channels(8) 5170 .width(5) 5171 .output_stride(43) 5172 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5173 } 5174 } 5175 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_qmin)5176 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_qmin) { 5177 TEST_REQUIRES_X86_FMA3; 5178 for (size_t channels = 1; channels <= 40; channels += 7) { 5179 DWConvMicrokernelTester() 5180 .cr(8) 5181 .kr(4) 5182 .channels(channels) 5183 .width(3) 5184 .qmin(128) 5185 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5186 } 5187 } 5188 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,multipixel_with_qmax)5189 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, multipixel_with_qmax) { 5190 TEST_REQUIRES_X86_FMA3; 5191 for (size_t channels = 1; channels <= 40; channels += 7) { 5192 DWConvMicrokernelTester() 5193 .cr(8) 5194 .kr(4) 5195 .channels(channels) 5196 .width(3) 5197 .qmax(128) 5198 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5199 } 5200 } 5201 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,input_offset)5202 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, input_offset) { 5203 TEST_REQUIRES_X86_FMA3; 5204 for (uint32_t channels = 16; channels < 128; channels += 24) { 5205 DWConvMicrokernelTester() 5206 .cr(8) 5207 .kr(4) 5208 .channels(channels) 5209 .input_offset(176) 5210 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5211 } 5212 } 5213 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2,zero)5214 TEST(F16_DWCONV_MINMAX_UP8X4__FMA3_ACC2, zero) { 5215 TEST_REQUIRES_X86_FMA3; 5216 for (uint32_t mz = 0; mz < 4; mz++) { 5217 for (uint32_t channels = 16; channels < 128; channels += 24) { 5218 DWConvMicrokernelTester() 5219 .cr(8) 5220 .kr(4) 5221 .channels(channels) 5222 .input_offset(176) 5223 .zero_index(mz) 5224 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 5225 } 5226 } 5227 } 5228 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 5229 5230 5231 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_eq_8)5232 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_eq_8) { 5233 TEST_REQUIRES_X86_FMA3; 5234 DWConvMicrokernelTester() 5235 .cr(8) 5236 .kr(9) 5237 .channels(8) 5238 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5239 } 5240 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_div_8)5241 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8) { 5242 TEST_REQUIRES_X86_FMA3; 5243 for (uint32_t channels = 16; channels < 128; channels += 24) { 5244 DWConvMicrokernelTester() 5245 .cr(8) 5246 .kr(9) 5247 .channels(channels) 5248 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5249 } 5250 } 5251 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_div_8_with_qmin)5252 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8_with_qmin) { 5253 TEST_REQUIRES_X86_FMA3; 5254 for (uint32_t channels = 16; channels < 128; channels += 24) { 5255 DWConvMicrokernelTester() 5256 .cr(8) 5257 .kr(9) 5258 .channels(channels) 5259 .qmin(128) 5260 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5261 } 5262 } 5263 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_div_8_with_qmax)5264 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_div_8_with_qmax) { 5265 TEST_REQUIRES_X86_FMA3; 5266 for (uint32_t channels = 16; channels < 128; channels += 24) { 5267 DWConvMicrokernelTester() 5268 .cr(8) 5269 .kr(9) 5270 .channels(channels) 5271 .qmax(128) 5272 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5273 } 5274 } 5275 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_lt_8)5276 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_lt_8) { 5277 TEST_REQUIRES_X86_FMA3; 5278 for (uint32_t channels = 1; channels < 8; channels++) { 5279 DWConvMicrokernelTester() 5280 .cr(8) 5281 .kr(9) 5282 .channels(channels) 5283 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5284 } 5285 } 5286 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_gt_8)5287 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8) { 5288 TEST_REQUIRES_X86_FMA3; 5289 for (uint32_t channels = 9; channels < 16; channels++) { 5290 DWConvMicrokernelTester() 5291 .cr(8) 5292 .kr(9) 5293 .channels(channels) 5294 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5295 } 5296 } 5297 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_gt_8_with_qmin)5298 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8_with_qmin) { 5299 TEST_REQUIRES_X86_FMA3; 5300 for (uint32_t channels = 9; channels < 16; channels++) { 5301 DWConvMicrokernelTester() 5302 .cr(8) 5303 .kr(9) 5304 .channels(channels) 5305 .qmin(128) 5306 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5307 } 5308 } 5309 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,c_gt_8_with_qmax)5310 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, c_gt_8_with_qmax) { 5311 TEST_REQUIRES_X86_FMA3; 5312 for (uint32_t channels = 9; channels < 16; channels++) { 5313 DWConvMicrokernelTester() 5314 .cr(8) 5315 .kr(9) 5316 .channels(channels) 5317 .qmax(128) 5318 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5319 } 5320 } 5321 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel)5322 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel) { 5323 TEST_REQUIRES_X86_FMA3; 5324 for (size_t channels = 1; channels <= 40; channels += 7) { 5325 DWConvMicrokernelTester() 5326 .cr(8) 5327 .kr(9) 5328 .channels(channels) 5329 .width(3) 5330 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5331 } 5332 } 5333 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_step)5334 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_step) { 5335 TEST_REQUIRES_X86_FMA3; 5336 for (size_t channels = 1; channels <= 40; channels += 7) { 5337 for (size_t step = 2; step <= 9; step++) { 5338 DWConvMicrokernelTester() 5339 .cr(8) 5340 .kr(9) 5341 .channels(channels) 5342 .width(3) 5343 .step(step) 5344 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5345 } 5346 } 5347 } 5348 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_output_stride)5349 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_output_stride) { 5350 TEST_REQUIRES_X86_FMA3; 5351 for (size_t channels = 1; channels <= 40; channels += 7) { 5352 DWConvMicrokernelTester() 5353 .cr(8) 5354 .kr(9) 5355 .channels(8) 5356 .width(5) 5357 .output_stride(43) 5358 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5359 } 5360 } 5361 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_qmin)5362 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_qmin) { 5363 TEST_REQUIRES_X86_FMA3; 5364 for (size_t channels = 1; channels <= 40; channels += 7) { 5365 DWConvMicrokernelTester() 5366 .cr(8) 5367 .kr(9) 5368 .channels(channels) 5369 .width(3) 5370 .qmin(128) 5371 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5372 } 5373 } 5374 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,multipixel_with_qmax)5375 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, multipixel_with_qmax) { 5376 TEST_REQUIRES_X86_FMA3; 5377 for (size_t channels = 1; channels <= 40; channels += 7) { 5378 DWConvMicrokernelTester() 5379 .cr(8) 5380 .kr(9) 5381 .channels(channels) 5382 .width(3) 5383 .qmax(128) 5384 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5385 } 5386 } 5387 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,input_offset)5388 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, input_offset) { 5389 TEST_REQUIRES_X86_FMA3; 5390 for (uint32_t channels = 16; channels < 128; channels += 24) { 5391 DWConvMicrokernelTester() 5392 .cr(8) 5393 .kr(9) 5394 .channels(channels) 5395 .input_offset(176) 5396 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5397 } 5398 } 5399 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3,zero)5400 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3, zero) { 5401 TEST_REQUIRES_X86_FMA3; 5402 for (uint32_t mz = 0; mz < 9; mz++) { 5403 for (uint32_t channels = 16; channels < 128; channels += 24) { 5404 DWConvMicrokernelTester() 5405 .cr(8) 5406 .kr(9) 5407 .channels(channels) 5408 .input_offset(176) 5409 .zero_index(mz) 5410 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3, xnn_init_f16_minmax_avx_params); 5411 } 5412 } 5413 } 5414 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 5415 5416 5417 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_eq_8)5418 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_eq_8) { 5419 TEST_REQUIRES_X86_FMA3; 5420 DWConvMicrokernelTester() 5421 .cr(8) 5422 .kr(9) 5423 .channels(8) 5424 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5425 } 5426 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_div_8)5427 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8) { 5428 TEST_REQUIRES_X86_FMA3; 5429 for (uint32_t channels = 16; channels < 128; channels += 24) { 5430 DWConvMicrokernelTester() 5431 .cr(8) 5432 .kr(9) 5433 .channels(channels) 5434 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5435 } 5436 } 5437 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_div_8_with_qmin)5438 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8_with_qmin) { 5439 TEST_REQUIRES_X86_FMA3; 5440 for (uint32_t channels = 16; channels < 128; channels += 24) { 5441 DWConvMicrokernelTester() 5442 .cr(8) 5443 .kr(9) 5444 .channels(channels) 5445 .qmin(128) 5446 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5447 } 5448 } 5449 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_div_8_with_qmax)5450 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_div_8_with_qmax) { 5451 TEST_REQUIRES_X86_FMA3; 5452 for (uint32_t channels = 16; channels < 128; channels += 24) { 5453 DWConvMicrokernelTester() 5454 .cr(8) 5455 .kr(9) 5456 .channels(channels) 5457 .qmax(128) 5458 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5459 } 5460 } 5461 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_lt_8)5462 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_lt_8) { 5463 TEST_REQUIRES_X86_FMA3; 5464 for (uint32_t channels = 1; channels < 8; channels++) { 5465 DWConvMicrokernelTester() 5466 .cr(8) 5467 .kr(9) 5468 .channels(channels) 5469 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5470 } 5471 } 5472 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_gt_8)5473 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8) { 5474 TEST_REQUIRES_X86_FMA3; 5475 for (uint32_t channels = 9; channels < 16; channels++) { 5476 DWConvMicrokernelTester() 5477 .cr(8) 5478 .kr(9) 5479 .channels(channels) 5480 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5481 } 5482 } 5483 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_gt_8_with_qmin)5484 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8_with_qmin) { 5485 TEST_REQUIRES_X86_FMA3; 5486 for (uint32_t channels = 9; channels < 16; channels++) { 5487 DWConvMicrokernelTester() 5488 .cr(8) 5489 .kr(9) 5490 .channels(channels) 5491 .qmin(128) 5492 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5493 } 5494 } 5495 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,c_gt_8_with_qmax)5496 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, c_gt_8_with_qmax) { 5497 TEST_REQUIRES_X86_FMA3; 5498 for (uint32_t channels = 9; channels < 16; channels++) { 5499 DWConvMicrokernelTester() 5500 .cr(8) 5501 .kr(9) 5502 .channels(channels) 5503 .qmax(128) 5504 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5505 } 5506 } 5507 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel)5508 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel) { 5509 TEST_REQUIRES_X86_FMA3; 5510 for (size_t channels = 1; channels <= 40; channels += 7) { 5511 DWConvMicrokernelTester() 5512 .cr(8) 5513 .kr(9) 5514 .channels(channels) 5515 .width(3) 5516 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5517 } 5518 } 5519 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_step)5520 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_step) { 5521 TEST_REQUIRES_X86_FMA3; 5522 for (size_t channels = 1; channels <= 40; channels += 7) { 5523 for (size_t step = 2; step <= 9; step++) { 5524 DWConvMicrokernelTester() 5525 .cr(8) 5526 .kr(9) 5527 .channels(channels) 5528 .width(3) 5529 .step(step) 5530 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5531 } 5532 } 5533 } 5534 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_output_stride)5535 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_output_stride) { 5536 TEST_REQUIRES_X86_FMA3; 5537 for (size_t channels = 1; channels <= 40; channels += 7) { 5538 DWConvMicrokernelTester() 5539 .cr(8) 5540 .kr(9) 5541 .channels(8) 5542 .width(5) 5543 .output_stride(43) 5544 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5545 } 5546 } 5547 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_qmin)5548 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_qmin) { 5549 TEST_REQUIRES_X86_FMA3; 5550 for (size_t channels = 1; channels <= 40; channels += 7) { 5551 DWConvMicrokernelTester() 5552 .cr(8) 5553 .kr(9) 5554 .channels(channels) 5555 .width(3) 5556 .qmin(128) 5557 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5558 } 5559 } 5560 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,multipixel_with_qmax)5561 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, multipixel_with_qmax) { 5562 TEST_REQUIRES_X86_FMA3; 5563 for (size_t channels = 1; channels <= 40; channels += 7) { 5564 DWConvMicrokernelTester() 5565 .cr(8) 5566 .kr(9) 5567 .channels(channels) 5568 .width(3) 5569 .qmax(128) 5570 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5571 } 5572 } 5573 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,input_offset)5574 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, input_offset) { 5575 TEST_REQUIRES_X86_FMA3; 5576 for (uint32_t channels = 16; channels < 128; channels += 24) { 5577 DWConvMicrokernelTester() 5578 .cr(8) 5579 .kr(9) 5580 .channels(channels) 5581 .input_offset(176) 5582 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5583 } 5584 } 5585 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2,zero)5586 TEST(F16_DWCONV_MINMAX_UP8X9__FMA3_ACC2, zero) { 5587 TEST_REQUIRES_X86_FMA3; 5588 for (uint32_t mz = 0; mz < 9; mz++) { 5589 for (uint32_t channels = 16; channels < 128; channels += 24) { 5590 DWConvMicrokernelTester() 5591 .cr(8) 5592 .kr(9) 5593 .channels(channels) 5594 .input_offset(176) 5595 .zero_index(mz) 5596 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 5597 } 5598 } 5599 } 5600 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 5601 5602 5603 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_eq_8)5604 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_eq_8) { 5605 TEST_REQUIRES_X86_FMA3; 5606 DWConvMicrokernelTester() 5607 .cr(8) 5608 .kr(25) 5609 .channels(8) 5610 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5611 } 5612 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_div_8)5613 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8) { 5614 TEST_REQUIRES_X86_FMA3; 5615 for (uint32_t channels = 16; channels < 128; channels += 24) { 5616 DWConvMicrokernelTester() 5617 .cr(8) 5618 .kr(25) 5619 .channels(channels) 5620 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5621 } 5622 } 5623 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_div_8_with_qmin)5624 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8_with_qmin) { 5625 TEST_REQUIRES_X86_FMA3; 5626 for (uint32_t channels = 16; channels < 128; channels += 24) { 5627 DWConvMicrokernelTester() 5628 .cr(8) 5629 .kr(25) 5630 .channels(channels) 5631 .qmin(128) 5632 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5633 } 5634 } 5635 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_div_8_with_qmax)5636 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_div_8_with_qmax) { 5637 TEST_REQUIRES_X86_FMA3; 5638 for (uint32_t channels = 16; channels < 128; channels += 24) { 5639 DWConvMicrokernelTester() 5640 .cr(8) 5641 .kr(25) 5642 .channels(channels) 5643 .qmax(128) 5644 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5645 } 5646 } 5647 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_lt_8)5648 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_lt_8) { 5649 TEST_REQUIRES_X86_FMA3; 5650 for (uint32_t channels = 1; channels < 8; channels++) { 5651 DWConvMicrokernelTester() 5652 .cr(8) 5653 .kr(25) 5654 .channels(channels) 5655 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5656 } 5657 } 5658 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_gt_8)5659 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8) { 5660 TEST_REQUIRES_X86_FMA3; 5661 for (uint32_t channels = 9; channels < 16; channels++) { 5662 DWConvMicrokernelTester() 5663 .cr(8) 5664 .kr(25) 5665 .channels(channels) 5666 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5667 } 5668 } 5669 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_gt_8_with_qmin)5670 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8_with_qmin) { 5671 TEST_REQUIRES_X86_FMA3; 5672 for (uint32_t channels = 9; channels < 16; channels++) { 5673 DWConvMicrokernelTester() 5674 .cr(8) 5675 .kr(25) 5676 .channels(channels) 5677 .qmin(128) 5678 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5679 } 5680 } 5681 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,c_gt_8_with_qmax)5682 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, c_gt_8_with_qmax) { 5683 TEST_REQUIRES_X86_FMA3; 5684 for (uint32_t channels = 9; channels < 16; channels++) { 5685 DWConvMicrokernelTester() 5686 .cr(8) 5687 .kr(25) 5688 .channels(channels) 5689 .qmax(128) 5690 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5691 } 5692 } 5693 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel)5694 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel) { 5695 TEST_REQUIRES_X86_FMA3; 5696 for (size_t channels = 1; channels <= 40; channels += 7) { 5697 DWConvMicrokernelTester() 5698 .cr(8) 5699 .kr(25) 5700 .channels(channels) 5701 .width(3) 5702 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5703 } 5704 } 5705 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_step)5706 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_step) { 5707 TEST_REQUIRES_X86_FMA3; 5708 for (size_t channels = 1; channels <= 40; channels += 7) { 5709 for (size_t step = 2; step <= 25; step++) { 5710 DWConvMicrokernelTester() 5711 .cr(8) 5712 .kr(25) 5713 .channels(channels) 5714 .width(3) 5715 .step(step) 5716 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5717 } 5718 } 5719 } 5720 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_output_stride)5721 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_output_stride) { 5722 TEST_REQUIRES_X86_FMA3; 5723 for (size_t channels = 1; channels <= 40; channels += 7) { 5724 DWConvMicrokernelTester() 5725 .cr(8) 5726 .kr(25) 5727 .channels(8) 5728 .width(5) 5729 .output_stride(43) 5730 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5731 } 5732 } 5733 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_qmin)5734 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_qmin) { 5735 TEST_REQUIRES_X86_FMA3; 5736 for (size_t channels = 1; channels <= 40; channels += 7) { 5737 DWConvMicrokernelTester() 5738 .cr(8) 5739 .kr(25) 5740 .channels(channels) 5741 .width(3) 5742 .qmin(128) 5743 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5744 } 5745 } 5746 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,multipixel_with_qmax)5747 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, multipixel_with_qmax) { 5748 TEST_REQUIRES_X86_FMA3; 5749 for (size_t channels = 1; channels <= 40; channels += 7) { 5750 DWConvMicrokernelTester() 5751 .cr(8) 5752 .kr(25) 5753 .channels(channels) 5754 .width(3) 5755 .qmax(128) 5756 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5757 } 5758 } 5759 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,input_offset)5760 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, input_offset) { 5761 TEST_REQUIRES_X86_FMA3; 5762 for (uint32_t channels = 16; channels < 128; channels += 24) { 5763 DWConvMicrokernelTester() 5764 .cr(8) 5765 .kr(25) 5766 .channels(channels) 5767 .input_offset(176) 5768 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5769 } 5770 } 5771 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3,zero)5772 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3, zero) { 5773 TEST_REQUIRES_X86_FMA3; 5774 for (uint32_t mz = 0; mz < 25; mz++) { 5775 for (uint32_t channels = 16; channels < 128; channels += 24) { 5776 DWConvMicrokernelTester() 5777 .cr(8) 5778 .kr(25) 5779 .channels(channels) 5780 .input_offset(176) 5781 .zero_index(mz) 5782 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3, xnn_init_f16_minmax_avx_params); 5783 } 5784 } 5785 } 5786 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 5787 5788 5789 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_eq_8)5790 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_eq_8) { 5791 TEST_REQUIRES_X86_FMA3; 5792 DWConvMicrokernelTester() 5793 .cr(8) 5794 .kr(25) 5795 .channels(8) 5796 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5797 } 5798 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_div_8)5799 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8) { 5800 TEST_REQUIRES_X86_FMA3; 5801 for (uint32_t channels = 16; channels < 128; channels += 24) { 5802 DWConvMicrokernelTester() 5803 .cr(8) 5804 .kr(25) 5805 .channels(channels) 5806 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5807 } 5808 } 5809 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_div_8_with_qmin)5810 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8_with_qmin) { 5811 TEST_REQUIRES_X86_FMA3; 5812 for (uint32_t channels = 16; channels < 128; channels += 24) { 5813 DWConvMicrokernelTester() 5814 .cr(8) 5815 .kr(25) 5816 .channels(channels) 5817 .qmin(128) 5818 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5819 } 5820 } 5821 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_div_8_with_qmax)5822 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_div_8_with_qmax) { 5823 TEST_REQUIRES_X86_FMA3; 5824 for (uint32_t channels = 16; channels < 128; channels += 24) { 5825 DWConvMicrokernelTester() 5826 .cr(8) 5827 .kr(25) 5828 .channels(channels) 5829 .qmax(128) 5830 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5831 } 5832 } 5833 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_lt_8)5834 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_lt_8) { 5835 TEST_REQUIRES_X86_FMA3; 5836 for (uint32_t channels = 1; channels < 8; channels++) { 5837 DWConvMicrokernelTester() 5838 .cr(8) 5839 .kr(25) 5840 .channels(channels) 5841 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5842 } 5843 } 5844 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_gt_8)5845 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8) { 5846 TEST_REQUIRES_X86_FMA3; 5847 for (uint32_t channels = 9; channels < 16; channels++) { 5848 DWConvMicrokernelTester() 5849 .cr(8) 5850 .kr(25) 5851 .channels(channels) 5852 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5853 } 5854 } 5855 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_gt_8_with_qmin)5856 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8_with_qmin) { 5857 TEST_REQUIRES_X86_FMA3; 5858 for (uint32_t channels = 9; channels < 16; channels++) { 5859 DWConvMicrokernelTester() 5860 .cr(8) 5861 .kr(25) 5862 .channels(channels) 5863 .qmin(128) 5864 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5865 } 5866 } 5867 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,c_gt_8_with_qmax)5868 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, c_gt_8_with_qmax) { 5869 TEST_REQUIRES_X86_FMA3; 5870 for (uint32_t channels = 9; channels < 16; channels++) { 5871 DWConvMicrokernelTester() 5872 .cr(8) 5873 .kr(25) 5874 .channels(channels) 5875 .qmax(128) 5876 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5877 } 5878 } 5879 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel)5880 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel) { 5881 TEST_REQUIRES_X86_FMA3; 5882 for (size_t channels = 1; channels <= 40; channels += 7) { 5883 DWConvMicrokernelTester() 5884 .cr(8) 5885 .kr(25) 5886 .channels(channels) 5887 .width(3) 5888 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5889 } 5890 } 5891 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_step)5892 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_step) { 5893 TEST_REQUIRES_X86_FMA3; 5894 for (size_t channels = 1; channels <= 40; channels += 7) { 5895 for (size_t step = 2; step <= 25; step++) { 5896 DWConvMicrokernelTester() 5897 .cr(8) 5898 .kr(25) 5899 .channels(channels) 5900 .width(3) 5901 .step(step) 5902 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5903 } 5904 } 5905 } 5906 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_output_stride)5907 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_output_stride) { 5908 TEST_REQUIRES_X86_FMA3; 5909 for (size_t channels = 1; channels <= 40; channels += 7) { 5910 DWConvMicrokernelTester() 5911 .cr(8) 5912 .kr(25) 5913 .channels(8) 5914 .width(5) 5915 .output_stride(43) 5916 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5917 } 5918 } 5919 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_qmin)5920 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_qmin) { 5921 TEST_REQUIRES_X86_FMA3; 5922 for (size_t channels = 1; channels <= 40; channels += 7) { 5923 DWConvMicrokernelTester() 5924 .cr(8) 5925 .kr(25) 5926 .channels(channels) 5927 .width(3) 5928 .qmin(128) 5929 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5930 } 5931 } 5932 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,multipixel_with_qmax)5933 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, multipixel_with_qmax) { 5934 TEST_REQUIRES_X86_FMA3; 5935 for (size_t channels = 1; channels <= 40; channels += 7) { 5936 DWConvMicrokernelTester() 5937 .cr(8) 5938 .kr(25) 5939 .channels(channels) 5940 .width(3) 5941 .qmax(128) 5942 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5943 } 5944 } 5945 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,input_offset)5946 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, input_offset) { 5947 TEST_REQUIRES_X86_FMA3; 5948 for (uint32_t channels = 16; channels < 128; channels += 24) { 5949 DWConvMicrokernelTester() 5950 .cr(8) 5951 .kr(25) 5952 .channels(channels) 5953 .input_offset(176) 5954 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5955 } 5956 } 5957 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2,zero)5958 TEST(F16_DWCONV_MINMAX_UP8X25__FMA3_ACC2, zero) { 5959 TEST_REQUIRES_X86_FMA3; 5960 for (uint32_t mz = 0; mz < 25; mz++) { 5961 for (uint32_t channels = 16; channels < 128; channels += 24) { 5962 DWConvMicrokernelTester() 5963 .cr(8) 5964 .kr(25) 5965 .channels(channels) 5966 .input_offset(176) 5967 .zero_index(mz) 5968 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 5969 } 5970 } 5971 } 5972 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 5973 5974 5975 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_eq_16)5976 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_eq_16) { 5977 TEST_REQUIRES_X86_FMA3; 5978 DWConvMicrokernelTester() 5979 .cr(16) 5980 .kr(3) 5981 .channels(16) 5982 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 5983 } 5984 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_div_16)5985 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_div_16) { 5986 TEST_REQUIRES_X86_FMA3; 5987 for (uint32_t channels = 32; channels < 256; channels += 48) { 5988 DWConvMicrokernelTester() 5989 .cr(16) 5990 .kr(3) 5991 .channels(channels) 5992 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 5993 } 5994 } 5995 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_div_16_with_qmin)5996 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_div_16_with_qmin) { 5997 TEST_REQUIRES_X86_FMA3; 5998 for (uint32_t channels = 32; channels < 256; channels += 48) { 5999 DWConvMicrokernelTester() 6000 .cr(16) 6001 .kr(3) 6002 .channels(channels) 6003 .qmin(128) 6004 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6005 } 6006 } 6007 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_div_16_with_qmax)6008 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_div_16_with_qmax) { 6009 TEST_REQUIRES_X86_FMA3; 6010 for (uint32_t channels = 32; channels < 256; channels += 48) { 6011 DWConvMicrokernelTester() 6012 .cr(16) 6013 .kr(3) 6014 .channels(channels) 6015 .qmax(128) 6016 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6017 } 6018 } 6019 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_lt_16)6020 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_lt_16) { 6021 TEST_REQUIRES_X86_FMA3; 6022 for (uint32_t channels = 1; channels < 16; channels++) { 6023 DWConvMicrokernelTester() 6024 .cr(16) 6025 .kr(3) 6026 .channels(channels) 6027 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6028 } 6029 } 6030 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_gt_16)6031 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_gt_16) { 6032 TEST_REQUIRES_X86_FMA3; 6033 for (uint32_t channels = 17; channels < 32; channels++) { 6034 DWConvMicrokernelTester() 6035 .cr(16) 6036 .kr(3) 6037 .channels(channels) 6038 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6039 } 6040 } 6041 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_gt_16_with_qmin)6042 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_gt_16_with_qmin) { 6043 TEST_REQUIRES_X86_FMA3; 6044 for (uint32_t channels = 17; channels < 32; channels++) { 6045 DWConvMicrokernelTester() 6046 .cr(16) 6047 .kr(3) 6048 .channels(channels) 6049 .qmin(128) 6050 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6051 } 6052 } 6053 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,c_gt_16_with_qmax)6054 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, c_gt_16_with_qmax) { 6055 TEST_REQUIRES_X86_FMA3; 6056 for (uint32_t channels = 17; channels < 32; channels++) { 6057 DWConvMicrokernelTester() 6058 .cr(16) 6059 .kr(3) 6060 .channels(channels) 6061 .qmax(128) 6062 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6063 } 6064 } 6065 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel)6066 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel) { 6067 TEST_REQUIRES_X86_FMA3; 6068 for (size_t channels = 1; channels <= 80; channels += 15) { 6069 DWConvMicrokernelTester() 6070 .cr(16) 6071 .kr(3) 6072 .channels(channels) 6073 .width(3) 6074 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6075 } 6076 } 6077 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_step)6078 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_step) { 6079 TEST_REQUIRES_X86_FMA3; 6080 for (size_t channels = 1; channels <= 80; channels += 15) { 6081 for (size_t step = 2; step <= 3; step++) { 6082 DWConvMicrokernelTester() 6083 .cr(16) 6084 .kr(3) 6085 .channels(channels) 6086 .width(3) 6087 .step(step) 6088 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6089 } 6090 } 6091 } 6092 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_output_stride)6093 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_output_stride) { 6094 TEST_REQUIRES_X86_FMA3; 6095 for (size_t channels = 1; channels <= 80; channels += 15) { 6096 DWConvMicrokernelTester() 6097 .cr(16) 6098 .kr(3) 6099 .channels(16) 6100 .width(5) 6101 .output_stride(83) 6102 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6103 } 6104 } 6105 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_qmin)6106 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_qmin) { 6107 TEST_REQUIRES_X86_FMA3; 6108 for (size_t channels = 1; channels <= 80; channels += 15) { 6109 DWConvMicrokernelTester() 6110 .cr(16) 6111 .kr(3) 6112 .channels(channels) 6113 .width(3) 6114 .qmin(128) 6115 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6116 } 6117 } 6118 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,multipixel_with_qmax)6119 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, multipixel_with_qmax) { 6120 TEST_REQUIRES_X86_FMA3; 6121 for (size_t channels = 1; channels <= 80; channels += 15) { 6122 DWConvMicrokernelTester() 6123 .cr(16) 6124 .kr(3) 6125 .channels(channels) 6126 .width(3) 6127 .qmax(128) 6128 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6129 } 6130 } 6131 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,input_offset)6132 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, input_offset) { 6133 TEST_REQUIRES_X86_FMA3; 6134 for (uint32_t channels = 32; channels < 256; channels += 48) { 6135 DWConvMicrokernelTester() 6136 .cr(16) 6137 .kr(3) 6138 .channels(channels) 6139 .input_offset(304) 6140 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6141 } 6142 } 6143 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3,zero)6144 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3, zero) { 6145 TEST_REQUIRES_X86_FMA3; 6146 for (uint32_t mz = 0; mz < 3; mz++) { 6147 for (uint32_t channels = 32; channels < 256; channels += 48) { 6148 DWConvMicrokernelTester() 6149 .cr(16) 6150 .kr(3) 6151 .channels(channels) 6152 .input_offset(304) 6153 .zero_index(mz) 6154 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3, xnn_init_f16_minmax_avx_params); 6155 } 6156 } 6157 } 6158 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 6159 6160 6161 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_eq_16)6162 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_eq_16) { 6163 TEST_REQUIRES_X86_FMA3; 6164 DWConvMicrokernelTester() 6165 .cr(16) 6166 .kr(3) 6167 .channels(16) 6168 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6169 } 6170 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_div_16)6171 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_div_16) { 6172 TEST_REQUIRES_X86_FMA3; 6173 for (uint32_t channels = 32; channels < 256; channels += 48) { 6174 DWConvMicrokernelTester() 6175 .cr(16) 6176 .kr(3) 6177 .channels(channels) 6178 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6179 } 6180 } 6181 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_div_16_with_qmin)6182 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_div_16_with_qmin) { 6183 TEST_REQUIRES_X86_FMA3; 6184 for (uint32_t channels = 32; channels < 256; channels += 48) { 6185 DWConvMicrokernelTester() 6186 .cr(16) 6187 .kr(3) 6188 .channels(channels) 6189 .qmin(128) 6190 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6191 } 6192 } 6193 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_div_16_with_qmax)6194 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_div_16_with_qmax) { 6195 TEST_REQUIRES_X86_FMA3; 6196 for (uint32_t channels = 32; channels < 256; channels += 48) { 6197 DWConvMicrokernelTester() 6198 .cr(16) 6199 .kr(3) 6200 .channels(channels) 6201 .qmax(128) 6202 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6203 } 6204 } 6205 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_lt_16)6206 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_lt_16) { 6207 TEST_REQUIRES_X86_FMA3; 6208 for (uint32_t channels = 1; channels < 16; channels++) { 6209 DWConvMicrokernelTester() 6210 .cr(16) 6211 .kr(3) 6212 .channels(channels) 6213 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6214 } 6215 } 6216 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_gt_16)6217 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_gt_16) { 6218 TEST_REQUIRES_X86_FMA3; 6219 for (uint32_t channels = 17; channels < 32; channels++) { 6220 DWConvMicrokernelTester() 6221 .cr(16) 6222 .kr(3) 6223 .channels(channels) 6224 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6225 } 6226 } 6227 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_gt_16_with_qmin)6228 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_gt_16_with_qmin) { 6229 TEST_REQUIRES_X86_FMA3; 6230 for (uint32_t channels = 17; channels < 32; channels++) { 6231 DWConvMicrokernelTester() 6232 .cr(16) 6233 .kr(3) 6234 .channels(channels) 6235 .qmin(128) 6236 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6237 } 6238 } 6239 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,c_gt_16_with_qmax)6240 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, c_gt_16_with_qmax) { 6241 TEST_REQUIRES_X86_FMA3; 6242 for (uint32_t channels = 17; channels < 32; channels++) { 6243 DWConvMicrokernelTester() 6244 .cr(16) 6245 .kr(3) 6246 .channels(channels) 6247 .qmax(128) 6248 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6249 } 6250 } 6251 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel)6252 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel) { 6253 TEST_REQUIRES_X86_FMA3; 6254 for (size_t channels = 1; channels <= 80; channels += 15) { 6255 DWConvMicrokernelTester() 6256 .cr(16) 6257 .kr(3) 6258 .channels(channels) 6259 .width(3) 6260 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6261 } 6262 } 6263 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_step)6264 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_step) { 6265 TEST_REQUIRES_X86_FMA3; 6266 for (size_t channels = 1; channels <= 80; channels += 15) { 6267 for (size_t step = 2; step <= 3; step++) { 6268 DWConvMicrokernelTester() 6269 .cr(16) 6270 .kr(3) 6271 .channels(channels) 6272 .width(3) 6273 .step(step) 6274 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6275 } 6276 } 6277 } 6278 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_output_stride)6279 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_output_stride) { 6280 TEST_REQUIRES_X86_FMA3; 6281 for (size_t channels = 1; channels <= 80; channels += 15) { 6282 DWConvMicrokernelTester() 6283 .cr(16) 6284 .kr(3) 6285 .channels(16) 6286 .width(5) 6287 .output_stride(83) 6288 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6289 } 6290 } 6291 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_qmin)6292 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_qmin) { 6293 TEST_REQUIRES_X86_FMA3; 6294 for (size_t channels = 1; channels <= 80; channels += 15) { 6295 DWConvMicrokernelTester() 6296 .cr(16) 6297 .kr(3) 6298 .channels(channels) 6299 .width(3) 6300 .qmin(128) 6301 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6302 } 6303 } 6304 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,multipixel_with_qmax)6305 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, multipixel_with_qmax) { 6306 TEST_REQUIRES_X86_FMA3; 6307 for (size_t channels = 1; channels <= 80; channels += 15) { 6308 DWConvMicrokernelTester() 6309 .cr(16) 6310 .kr(3) 6311 .channels(channels) 6312 .width(3) 6313 .qmax(128) 6314 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6315 } 6316 } 6317 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,input_offset)6318 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, input_offset) { 6319 TEST_REQUIRES_X86_FMA3; 6320 for (uint32_t channels = 32; channels < 256; channels += 48) { 6321 DWConvMicrokernelTester() 6322 .cr(16) 6323 .kr(3) 6324 .channels(channels) 6325 .input_offset(304) 6326 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6327 } 6328 } 6329 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2,zero)6330 TEST(F16_DWCONV_MINMAX_UP16X3__FMA3_ACC2, zero) { 6331 TEST_REQUIRES_X86_FMA3; 6332 for (uint32_t mz = 0; mz < 3; mz++) { 6333 for (uint32_t channels = 32; channels < 256; channels += 48) { 6334 DWConvMicrokernelTester() 6335 .cr(16) 6336 .kr(3) 6337 .channels(channels) 6338 .input_offset(304) 6339 .zero_index(mz) 6340 .Test(xnn_f16_dwconv_minmax_ukernel_up16x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 6341 } 6342 } 6343 } 6344 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 6345 6346 6347 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_eq_16)6348 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_eq_16) { 6349 TEST_REQUIRES_X86_FMA3; 6350 DWConvMicrokernelTester() 6351 .cr(16) 6352 .kr(4) 6353 .channels(16) 6354 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6355 } 6356 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_div_16)6357 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16) { 6358 TEST_REQUIRES_X86_FMA3; 6359 for (uint32_t channels = 32; channels < 256; channels += 48) { 6360 DWConvMicrokernelTester() 6361 .cr(16) 6362 .kr(4) 6363 .channels(channels) 6364 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6365 } 6366 } 6367 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_div_16_with_qmin)6368 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16_with_qmin) { 6369 TEST_REQUIRES_X86_FMA3; 6370 for (uint32_t channels = 32; channels < 256; channels += 48) { 6371 DWConvMicrokernelTester() 6372 .cr(16) 6373 .kr(4) 6374 .channels(channels) 6375 .qmin(128) 6376 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6377 } 6378 } 6379 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_div_16_with_qmax)6380 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_div_16_with_qmax) { 6381 TEST_REQUIRES_X86_FMA3; 6382 for (uint32_t channels = 32; channels < 256; channels += 48) { 6383 DWConvMicrokernelTester() 6384 .cr(16) 6385 .kr(4) 6386 .channels(channels) 6387 .qmax(128) 6388 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6389 } 6390 } 6391 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_lt_16)6392 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_lt_16) { 6393 TEST_REQUIRES_X86_FMA3; 6394 for (uint32_t channels = 1; channels < 16; channels++) { 6395 DWConvMicrokernelTester() 6396 .cr(16) 6397 .kr(4) 6398 .channels(channels) 6399 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6400 } 6401 } 6402 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_gt_16)6403 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16) { 6404 TEST_REQUIRES_X86_FMA3; 6405 for (uint32_t channels = 17; channels < 32; channels++) { 6406 DWConvMicrokernelTester() 6407 .cr(16) 6408 .kr(4) 6409 .channels(channels) 6410 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6411 } 6412 } 6413 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_gt_16_with_qmin)6414 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16_with_qmin) { 6415 TEST_REQUIRES_X86_FMA3; 6416 for (uint32_t channels = 17; channels < 32; channels++) { 6417 DWConvMicrokernelTester() 6418 .cr(16) 6419 .kr(4) 6420 .channels(channels) 6421 .qmin(128) 6422 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6423 } 6424 } 6425 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,c_gt_16_with_qmax)6426 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, c_gt_16_with_qmax) { 6427 TEST_REQUIRES_X86_FMA3; 6428 for (uint32_t channels = 17; channels < 32; channels++) { 6429 DWConvMicrokernelTester() 6430 .cr(16) 6431 .kr(4) 6432 .channels(channels) 6433 .qmax(128) 6434 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6435 } 6436 } 6437 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel)6438 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel) { 6439 TEST_REQUIRES_X86_FMA3; 6440 for (size_t channels = 1; channels <= 80; channels += 15) { 6441 DWConvMicrokernelTester() 6442 .cr(16) 6443 .kr(4) 6444 .channels(channels) 6445 .width(3) 6446 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6447 } 6448 } 6449 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_step)6450 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_step) { 6451 TEST_REQUIRES_X86_FMA3; 6452 for (size_t channels = 1; channels <= 80; channels += 15) { 6453 for (size_t step = 2; step <= 4; step++) { 6454 DWConvMicrokernelTester() 6455 .cr(16) 6456 .kr(4) 6457 .channels(channels) 6458 .width(3) 6459 .step(step) 6460 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6461 } 6462 } 6463 } 6464 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_output_stride)6465 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_output_stride) { 6466 TEST_REQUIRES_X86_FMA3; 6467 for (size_t channels = 1; channels <= 80; channels += 15) { 6468 DWConvMicrokernelTester() 6469 .cr(16) 6470 .kr(4) 6471 .channels(16) 6472 .width(5) 6473 .output_stride(83) 6474 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6475 } 6476 } 6477 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_qmin)6478 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_qmin) { 6479 TEST_REQUIRES_X86_FMA3; 6480 for (size_t channels = 1; channels <= 80; channels += 15) { 6481 DWConvMicrokernelTester() 6482 .cr(16) 6483 .kr(4) 6484 .channels(channels) 6485 .width(3) 6486 .qmin(128) 6487 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6488 } 6489 } 6490 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,multipixel_with_qmax)6491 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, multipixel_with_qmax) { 6492 TEST_REQUIRES_X86_FMA3; 6493 for (size_t channels = 1; channels <= 80; channels += 15) { 6494 DWConvMicrokernelTester() 6495 .cr(16) 6496 .kr(4) 6497 .channels(channels) 6498 .width(3) 6499 .qmax(128) 6500 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6501 } 6502 } 6503 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,input_offset)6504 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, input_offset) { 6505 TEST_REQUIRES_X86_FMA3; 6506 for (uint32_t channels = 32; channels < 256; channels += 48) { 6507 DWConvMicrokernelTester() 6508 .cr(16) 6509 .kr(4) 6510 .channels(channels) 6511 .input_offset(304) 6512 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6513 } 6514 } 6515 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3,zero)6516 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3, zero) { 6517 TEST_REQUIRES_X86_FMA3; 6518 for (uint32_t mz = 0; mz < 4; mz++) { 6519 for (uint32_t channels = 32; channels < 256; channels += 48) { 6520 DWConvMicrokernelTester() 6521 .cr(16) 6522 .kr(4) 6523 .channels(channels) 6524 .input_offset(304) 6525 .zero_index(mz) 6526 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3, xnn_init_f16_minmax_avx_params); 6527 } 6528 } 6529 } 6530 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 6531 6532 6533 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_eq_16)6534 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_eq_16) { 6535 TEST_REQUIRES_X86_FMA3; 6536 DWConvMicrokernelTester() 6537 .cr(16) 6538 .kr(4) 6539 .channels(16) 6540 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6541 } 6542 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_div_16)6543 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16) { 6544 TEST_REQUIRES_X86_FMA3; 6545 for (uint32_t channels = 32; channels < 256; channels += 48) { 6546 DWConvMicrokernelTester() 6547 .cr(16) 6548 .kr(4) 6549 .channels(channels) 6550 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6551 } 6552 } 6553 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_div_16_with_qmin)6554 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16_with_qmin) { 6555 TEST_REQUIRES_X86_FMA3; 6556 for (uint32_t channels = 32; channels < 256; channels += 48) { 6557 DWConvMicrokernelTester() 6558 .cr(16) 6559 .kr(4) 6560 .channels(channels) 6561 .qmin(128) 6562 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6563 } 6564 } 6565 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_div_16_with_qmax)6566 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_div_16_with_qmax) { 6567 TEST_REQUIRES_X86_FMA3; 6568 for (uint32_t channels = 32; channels < 256; channels += 48) { 6569 DWConvMicrokernelTester() 6570 .cr(16) 6571 .kr(4) 6572 .channels(channels) 6573 .qmax(128) 6574 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6575 } 6576 } 6577 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_lt_16)6578 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_lt_16) { 6579 TEST_REQUIRES_X86_FMA3; 6580 for (uint32_t channels = 1; channels < 16; channels++) { 6581 DWConvMicrokernelTester() 6582 .cr(16) 6583 .kr(4) 6584 .channels(channels) 6585 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6586 } 6587 } 6588 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_gt_16)6589 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16) { 6590 TEST_REQUIRES_X86_FMA3; 6591 for (uint32_t channels = 17; channels < 32; channels++) { 6592 DWConvMicrokernelTester() 6593 .cr(16) 6594 .kr(4) 6595 .channels(channels) 6596 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6597 } 6598 } 6599 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_gt_16_with_qmin)6600 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16_with_qmin) { 6601 TEST_REQUIRES_X86_FMA3; 6602 for (uint32_t channels = 17; channels < 32; channels++) { 6603 DWConvMicrokernelTester() 6604 .cr(16) 6605 .kr(4) 6606 .channels(channels) 6607 .qmin(128) 6608 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6609 } 6610 } 6611 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,c_gt_16_with_qmax)6612 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, c_gt_16_with_qmax) { 6613 TEST_REQUIRES_X86_FMA3; 6614 for (uint32_t channels = 17; channels < 32; channels++) { 6615 DWConvMicrokernelTester() 6616 .cr(16) 6617 .kr(4) 6618 .channels(channels) 6619 .qmax(128) 6620 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6621 } 6622 } 6623 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel)6624 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel) { 6625 TEST_REQUIRES_X86_FMA3; 6626 for (size_t channels = 1; channels <= 80; channels += 15) { 6627 DWConvMicrokernelTester() 6628 .cr(16) 6629 .kr(4) 6630 .channels(channels) 6631 .width(3) 6632 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6633 } 6634 } 6635 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_step)6636 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_step) { 6637 TEST_REQUIRES_X86_FMA3; 6638 for (size_t channels = 1; channels <= 80; channels += 15) { 6639 for (size_t step = 2; step <= 4; step++) { 6640 DWConvMicrokernelTester() 6641 .cr(16) 6642 .kr(4) 6643 .channels(channels) 6644 .width(3) 6645 .step(step) 6646 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6647 } 6648 } 6649 } 6650 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_output_stride)6651 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_output_stride) { 6652 TEST_REQUIRES_X86_FMA3; 6653 for (size_t channels = 1; channels <= 80; channels += 15) { 6654 DWConvMicrokernelTester() 6655 .cr(16) 6656 .kr(4) 6657 .channels(16) 6658 .width(5) 6659 .output_stride(83) 6660 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6661 } 6662 } 6663 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_qmin)6664 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_qmin) { 6665 TEST_REQUIRES_X86_FMA3; 6666 for (size_t channels = 1; channels <= 80; channels += 15) { 6667 DWConvMicrokernelTester() 6668 .cr(16) 6669 .kr(4) 6670 .channels(channels) 6671 .width(3) 6672 .qmin(128) 6673 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6674 } 6675 } 6676 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,multipixel_with_qmax)6677 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, multipixel_with_qmax) { 6678 TEST_REQUIRES_X86_FMA3; 6679 for (size_t channels = 1; channels <= 80; channels += 15) { 6680 DWConvMicrokernelTester() 6681 .cr(16) 6682 .kr(4) 6683 .channels(channels) 6684 .width(3) 6685 .qmax(128) 6686 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6687 } 6688 } 6689 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,input_offset)6690 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, input_offset) { 6691 TEST_REQUIRES_X86_FMA3; 6692 for (uint32_t channels = 32; channels < 256; channels += 48) { 6693 DWConvMicrokernelTester() 6694 .cr(16) 6695 .kr(4) 6696 .channels(channels) 6697 .input_offset(304) 6698 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6699 } 6700 } 6701 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2,zero)6702 TEST(F16_DWCONV_MINMAX_UP16X4__FMA3_ACC2, zero) { 6703 TEST_REQUIRES_X86_FMA3; 6704 for (uint32_t mz = 0; mz < 4; mz++) { 6705 for (uint32_t channels = 32; channels < 256; channels += 48) { 6706 DWConvMicrokernelTester() 6707 .cr(16) 6708 .kr(4) 6709 .channels(channels) 6710 .input_offset(304) 6711 .zero_index(mz) 6712 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 6713 } 6714 } 6715 } 6716 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 6717 6718 6719 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_eq_16)6720 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_eq_16) { 6721 TEST_REQUIRES_X86_FMA3; 6722 DWConvMicrokernelTester() 6723 .cr(16) 6724 .kr(9) 6725 .channels(16) 6726 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6727 } 6728 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_div_16)6729 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16) { 6730 TEST_REQUIRES_X86_FMA3; 6731 for (uint32_t channels = 32; channels < 256; channels += 48) { 6732 DWConvMicrokernelTester() 6733 .cr(16) 6734 .kr(9) 6735 .channels(channels) 6736 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6737 } 6738 } 6739 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_div_16_with_qmin)6740 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16_with_qmin) { 6741 TEST_REQUIRES_X86_FMA3; 6742 for (uint32_t channels = 32; channels < 256; channels += 48) { 6743 DWConvMicrokernelTester() 6744 .cr(16) 6745 .kr(9) 6746 .channels(channels) 6747 .qmin(128) 6748 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6749 } 6750 } 6751 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_div_16_with_qmax)6752 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_div_16_with_qmax) { 6753 TEST_REQUIRES_X86_FMA3; 6754 for (uint32_t channels = 32; channels < 256; channels += 48) { 6755 DWConvMicrokernelTester() 6756 .cr(16) 6757 .kr(9) 6758 .channels(channels) 6759 .qmax(128) 6760 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6761 } 6762 } 6763 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_lt_16)6764 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_lt_16) { 6765 TEST_REQUIRES_X86_FMA3; 6766 for (uint32_t channels = 1; channels < 16; channels++) { 6767 DWConvMicrokernelTester() 6768 .cr(16) 6769 .kr(9) 6770 .channels(channels) 6771 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6772 } 6773 } 6774 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_gt_16)6775 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16) { 6776 TEST_REQUIRES_X86_FMA3; 6777 for (uint32_t channels = 17; channels < 32; channels++) { 6778 DWConvMicrokernelTester() 6779 .cr(16) 6780 .kr(9) 6781 .channels(channels) 6782 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6783 } 6784 } 6785 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_gt_16_with_qmin)6786 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16_with_qmin) { 6787 TEST_REQUIRES_X86_FMA3; 6788 for (uint32_t channels = 17; channels < 32; channels++) { 6789 DWConvMicrokernelTester() 6790 .cr(16) 6791 .kr(9) 6792 .channels(channels) 6793 .qmin(128) 6794 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6795 } 6796 } 6797 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,c_gt_16_with_qmax)6798 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, c_gt_16_with_qmax) { 6799 TEST_REQUIRES_X86_FMA3; 6800 for (uint32_t channels = 17; channels < 32; channels++) { 6801 DWConvMicrokernelTester() 6802 .cr(16) 6803 .kr(9) 6804 .channels(channels) 6805 .qmax(128) 6806 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6807 } 6808 } 6809 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel)6810 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel) { 6811 TEST_REQUIRES_X86_FMA3; 6812 for (size_t channels = 1; channels <= 80; channels += 15) { 6813 DWConvMicrokernelTester() 6814 .cr(16) 6815 .kr(9) 6816 .channels(channels) 6817 .width(3) 6818 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6819 } 6820 } 6821 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_step)6822 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_step) { 6823 TEST_REQUIRES_X86_FMA3; 6824 for (size_t channels = 1; channels <= 80; channels += 15) { 6825 for (size_t step = 2; step <= 9; step++) { 6826 DWConvMicrokernelTester() 6827 .cr(16) 6828 .kr(9) 6829 .channels(channels) 6830 .width(3) 6831 .step(step) 6832 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6833 } 6834 } 6835 } 6836 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_output_stride)6837 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_output_stride) { 6838 TEST_REQUIRES_X86_FMA3; 6839 for (size_t channels = 1; channels <= 80; channels += 15) { 6840 DWConvMicrokernelTester() 6841 .cr(16) 6842 .kr(9) 6843 .channels(16) 6844 .width(5) 6845 .output_stride(83) 6846 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6847 } 6848 } 6849 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_qmin)6850 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_qmin) { 6851 TEST_REQUIRES_X86_FMA3; 6852 for (size_t channels = 1; channels <= 80; channels += 15) { 6853 DWConvMicrokernelTester() 6854 .cr(16) 6855 .kr(9) 6856 .channels(channels) 6857 .width(3) 6858 .qmin(128) 6859 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6860 } 6861 } 6862 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,multipixel_with_qmax)6863 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, multipixel_with_qmax) { 6864 TEST_REQUIRES_X86_FMA3; 6865 for (size_t channels = 1; channels <= 80; channels += 15) { 6866 DWConvMicrokernelTester() 6867 .cr(16) 6868 .kr(9) 6869 .channels(channels) 6870 .width(3) 6871 .qmax(128) 6872 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6873 } 6874 } 6875 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,input_offset)6876 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, input_offset) { 6877 TEST_REQUIRES_X86_FMA3; 6878 for (uint32_t channels = 32; channels < 256; channels += 48) { 6879 DWConvMicrokernelTester() 6880 .cr(16) 6881 .kr(9) 6882 .channels(channels) 6883 .input_offset(304) 6884 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6885 } 6886 } 6887 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3,zero)6888 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3, zero) { 6889 TEST_REQUIRES_X86_FMA3; 6890 for (uint32_t mz = 0; mz < 9; mz++) { 6891 for (uint32_t channels = 32; channels < 256; channels += 48) { 6892 DWConvMicrokernelTester() 6893 .cr(16) 6894 .kr(9) 6895 .channels(channels) 6896 .input_offset(304) 6897 .zero_index(mz) 6898 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3, xnn_init_f16_minmax_avx_params); 6899 } 6900 } 6901 } 6902 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 6903 6904 6905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_eq_16)6906 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_eq_16) { 6907 TEST_REQUIRES_X86_FMA3; 6908 DWConvMicrokernelTester() 6909 .cr(16) 6910 .kr(9) 6911 .channels(16) 6912 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6913 } 6914 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_div_16)6915 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16) { 6916 TEST_REQUIRES_X86_FMA3; 6917 for (uint32_t channels = 32; channels < 256; channels += 48) { 6918 DWConvMicrokernelTester() 6919 .cr(16) 6920 .kr(9) 6921 .channels(channels) 6922 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6923 } 6924 } 6925 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_div_16_with_qmin)6926 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16_with_qmin) { 6927 TEST_REQUIRES_X86_FMA3; 6928 for (uint32_t channels = 32; channels < 256; channels += 48) { 6929 DWConvMicrokernelTester() 6930 .cr(16) 6931 .kr(9) 6932 .channels(channels) 6933 .qmin(128) 6934 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6935 } 6936 } 6937 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_div_16_with_qmax)6938 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_div_16_with_qmax) { 6939 TEST_REQUIRES_X86_FMA3; 6940 for (uint32_t channels = 32; channels < 256; channels += 48) { 6941 DWConvMicrokernelTester() 6942 .cr(16) 6943 .kr(9) 6944 .channels(channels) 6945 .qmax(128) 6946 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6947 } 6948 } 6949 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_lt_16)6950 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_lt_16) { 6951 TEST_REQUIRES_X86_FMA3; 6952 for (uint32_t channels = 1; channels < 16; channels++) { 6953 DWConvMicrokernelTester() 6954 .cr(16) 6955 .kr(9) 6956 .channels(channels) 6957 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6958 } 6959 } 6960 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_gt_16)6961 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16) { 6962 TEST_REQUIRES_X86_FMA3; 6963 for (uint32_t channels = 17; channels < 32; channels++) { 6964 DWConvMicrokernelTester() 6965 .cr(16) 6966 .kr(9) 6967 .channels(channels) 6968 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6969 } 6970 } 6971 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_gt_16_with_qmin)6972 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16_with_qmin) { 6973 TEST_REQUIRES_X86_FMA3; 6974 for (uint32_t channels = 17; channels < 32; channels++) { 6975 DWConvMicrokernelTester() 6976 .cr(16) 6977 .kr(9) 6978 .channels(channels) 6979 .qmin(128) 6980 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6981 } 6982 } 6983 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,c_gt_16_with_qmax)6984 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, c_gt_16_with_qmax) { 6985 TEST_REQUIRES_X86_FMA3; 6986 for (uint32_t channels = 17; channels < 32; channels++) { 6987 DWConvMicrokernelTester() 6988 .cr(16) 6989 .kr(9) 6990 .channels(channels) 6991 .qmax(128) 6992 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 6993 } 6994 } 6995 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel)6996 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel) { 6997 TEST_REQUIRES_X86_FMA3; 6998 for (size_t channels = 1; channels <= 80; channels += 15) { 6999 DWConvMicrokernelTester() 7000 .cr(16) 7001 .kr(9) 7002 .channels(channels) 7003 .width(3) 7004 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7005 } 7006 } 7007 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_step)7008 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_step) { 7009 TEST_REQUIRES_X86_FMA3; 7010 for (size_t channels = 1; channels <= 80; channels += 15) { 7011 for (size_t step = 2; step <= 9; step++) { 7012 DWConvMicrokernelTester() 7013 .cr(16) 7014 .kr(9) 7015 .channels(channels) 7016 .width(3) 7017 .step(step) 7018 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7019 } 7020 } 7021 } 7022 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_output_stride)7023 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_output_stride) { 7024 TEST_REQUIRES_X86_FMA3; 7025 for (size_t channels = 1; channels <= 80; channels += 15) { 7026 DWConvMicrokernelTester() 7027 .cr(16) 7028 .kr(9) 7029 .channels(16) 7030 .width(5) 7031 .output_stride(83) 7032 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7033 } 7034 } 7035 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_qmin)7036 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_qmin) { 7037 TEST_REQUIRES_X86_FMA3; 7038 for (size_t channels = 1; channels <= 80; channels += 15) { 7039 DWConvMicrokernelTester() 7040 .cr(16) 7041 .kr(9) 7042 .channels(channels) 7043 .width(3) 7044 .qmin(128) 7045 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7046 } 7047 } 7048 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,multipixel_with_qmax)7049 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, multipixel_with_qmax) { 7050 TEST_REQUIRES_X86_FMA3; 7051 for (size_t channels = 1; channels <= 80; channels += 15) { 7052 DWConvMicrokernelTester() 7053 .cr(16) 7054 .kr(9) 7055 .channels(channels) 7056 .width(3) 7057 .qmax(128) 7058 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7059 } 7060 } 7061 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,input_offset)7062 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, input_offset) { 7063 TEST_REQUIRES_X86_FMA3; 7064 for (uint32_t channels = 32; channels < 256; channels += 48) { 7065 DWConvMicrokernelTester() 7066 .cr(16) 7067 .kr(9) 7068 .channels(channels) 7069 .input_offset(304) 7070 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7071 } 7072 } 7073 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2,zero)7074 TEST(F16_DWCONV_MINMAX_UP16X9__FMA3_ACC2, zero) { 7075 TEST_REQUIRES_X86_FMA3; 7076 for (uint32_t mz = 0; mz < 9; mz++) { 7077 for (uint32_t channels = 32; channels < 256; channels += 48) { 7078 DWConvMicrokernelTester() 7079 .cr(16) 7080 .kr(9) 7081 .channels(channels) 7082 .input_offset(304) 7083 .zero_index(mz) 7084 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 7085 } 7086 } 7087 } 7088 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 7089 7090 7091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_eq_16)7092 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_eq_16) { 7093 TEST_REQUIRES_X86_FMA3; 7094 DWConvMicrokernelTester() 7095 .cr(16) 7096 .kr(25) 7097 .channels(16) 7098 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7099 } 7100 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_div_16)7101 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16) { 7102 TEST_REQUIRES_X86_FMA3; 7103 for (uint32_t channels = 32; channels < 256; channels += 48) { 7104 DWConvMicrokernelTester() 7105 .cr(16) 7106 .kr(25) 7107 .channels(channels) 7108 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7109 } 7110 } 7111 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_div_16_with_qmin)7112 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16_with_qmin) { 7113 TEST_REQUIRES_X86_FMA3; 7114 for (uint32_t channels = 32; channels < 256; channels += 48) { 7115 DWConvMicrokernelTester() 7116 .cr(16) 7117 .kr(25) 7118 .channels(channels) 7119 .qmin(128) 7120 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7121 } 7122 } 7123 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_div_16_with_qmax)7124 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_div_16_with_qmax) { 7125 TEST_REQUIRES_X86_FMA3; 7126 for (uint32_t channels = 32; channels < 256; channels += 48) { 7127 DWConvMicrokernelTester() 7128 .cr(16) 7129 .kr(25) 7130 .channels(channels) 7131 .qmax(128) 7132 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7133 } 7134 } 7135 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_lt_16)7136 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_lt_16) { 7137 TEST_REQUIRES_X86_FMA3; 7138 for (uint32_t channels = 1; channels < 16; channels++) { 7139 DWConvMicrokernelTester() 7140 .cr(16) 7141 .kr(25) 7142 .channels(channels) 7143 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7144 } 7145 } 7146 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_gt_16)7147 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16) { 7148 TEST_REQUIRES_X86_FMA3; 7149 for (uint32_t channels = 17; channels < 32; channels++) { 7150 DWConvMicrokernelTester() 7151 .cr(16) 7152 .kr(25) 7153 .channels(channels) 7154 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7155 } 7156 } 7157 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_gt_16_with_qmin)7158 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16_with_qmin) { 7159 TEST_REQUIRES_X86_FMA3; 7160 for (uint32_t channels = 17; channels < 32; channels++) { 7161 DWConvMicrokernelTester() 7162 .cr(16) 7163 .kr(25) 7164 .channels(channels) 7165 .qmin(128) 7166 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7167 } 7168 } 7169 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,c_gt_16_with_qmax)7170 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, c_gt_16_with_qmax) { 7171 TEST_REQUIRES_X86_FMA3; 7172 for (uint32_t channels = 17; channels < 32; channels++) { 7173 DWConvMicrokernelTester() 7174 .cr(16) 7175 .kr(25) 7176 .channels(channels) 7177 .qmax(128) 7178 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7179 } 7180 } 7181 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel)7182 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel) { 7183 TEST_REQUIRES_X86_FMA3; 7184 for (size_t channels = 1; channels <= 80; channels += 15) { 7185 DWConvMicrokernelTester() 7186 .cr(16) 7187 .kr(25) 7188 .channels(channels) 7189 .width(3) 7190 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7191 } 7192 } 7193 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_step)7194 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_step) { 7195 TEST_REQUIRES_X86_FMA3; 7196 for (size_t channels = 1; channels <= 80; channels += 15) { 7197 for (size_t step = 2; step <= 25; step++) { 7198 DWConvMicrokernelTester() 7199 .cr(16) 7200 .kr(25) 7201 .channels(channels) 7202 .width(3) 7203 .step(step) 7204 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7205 } 7206 } 7207 } 7208 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_output_stride)7209 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_output_stride) { 7210 TEST_REQUIRES_X86_FMA3; 7211 for (size_t channels = 1; channels <= 80; channels += 15) { 7212 DWConvMicrokernelTester() 7213 .cr(16) 7214 .kr(25) 7215 .channels(16) 7216 .width(5) 7217 .output_stride(83) 7218 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7219 } 7220 } 7221 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_qmin)7222 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_qmin) { 7223 TEST_REQUIRES_X86_FMA3; 7224 for (size_t channels = 1; channels <= 80; channels += 15) { 7225 DWConvMicrokernelTester() 7226 .cr(16) 7227 .kr(25) 7228 .channels(channels) 7229 .width(3) 7230 .qmin(128) 7231 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7232 } 7233 } 7234 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,multipixel_with_qmax)7235 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, multipixel_with_qmax) { 7236 TEST_REQUIRES_X86_FMA3; 7237 for (size_t channels = 1; channels <= 80; channels += 15) { 7238 DWConvMicrokernelTester() 7239 .cr(16) 7240 .kr(25) 7241 .channels(channels) 7242 .width(3) 7243 .qmax(128) 7244 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7245 } 7246 } 7247 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,input_offset)7248 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, input_offset) { 7249 TEST_REQUIRES_X86_FMA3; 7250 for (uint32_t channels = 32; channels < 256; channels += 48) { 7251 DWConvMicrokernelTester() 7252 .cr(16) 7253 .kr(25) 7254 .channels(channels) 7255 .input_offset(304) 7256 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7257 } 7258 } 7259 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3,zero)7260 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3, zero) { 7261 TEST_REQUIRES_X86_FMA3; 7262 for (uint32_t mz = 0; mz < 25; mz++) { 7263 for (uint32_t channels = 32; channels < 256; channels += 48) { 7264 DWConvMicrokernelTester() 7265 .cr(16) 7266 .kr(25) 7267 .channels(channels) 7268 .input_offset(304) 7269 .zero_index(mz) 7270 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3, xnn_init_f16_minmax_avx_params); 7271 } 7272 } 7273 } 7274 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 7275 7276 7277 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_eq_16)7278 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_eq_16) { 7279 TEST_REQUIRES_X86_FMA3; 7280 DWConvMicrokernelTester() 7281 .cr(16) 7282 .kr(25) 7283 .channels(16) 7284 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7285 } 7286 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_div_16)7287 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16) { 7288 TEST_REQUIRES_X86_FMA3; 7289 for (uint32_t channels = 32; channels < 256; channels += 48) { 7290 DWConvMicrokernelTester() 7291 .cr(16) 7292 .kr(25) 7293 .channels(channels) 7294 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7295 } 7296 } 7297 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_div_16_with_qmin)7298 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16_with_qmin) { 7299 TEST_REQUIRES_X86_FMA3; 7300 for (uint32_t channels = 32; channels < 256; channels += 48) { 7301 DWConvMicrokernelTester() 7302 .cr(16) 7303 .kr(25) 7304 .channels(channels) 7305 .qmin(128) 7306 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7307 } 7308 } 7309 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_div_16_with_qmax)7310 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_div_16_with_qmax) { 7311 TEST_REQUIRES_X86_FMA3; 7312 for (uint32_t channels = 32; channels < 256; channels += 48) { 7313 DWConvMicrokernelTester() 7314 .cr(16) 7315 .kr(25) 7316 .channels(channels) 7317 .qmax(128) 7318 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7319 } 7320 } 7321 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_lt_16)7322 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_lt_16) { 7323 TEST_REQUIRES_X86_FMA3; 7324 for (uint32_t channels = 1; channels < 16; channels++) { 7325 DWConvMicrokernelTester() 7326 .cr(16) 7327 .kr(25) 7328 .channels(channels) 7329 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7330 } 7331 } 7332 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_gt_16)7333 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16) { 7334 TEST_REQUIRES_X86_FMA3; 7335 for (uint32_t channels = 17; channels < 32; channels++) { 7336 DWConvMicrokernelTester() 7337 .cr(16) 7338 .kr(25) 7339 .channels(channels) 7340 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7341 } 7342 } 7343 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_gt_16_with_qmin)7344 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16_with_qmin) { 7345 TEST_REQUIRES_X86_FMA3; 7346 for (uint32_t channels = 17; channels < 32; channels++) { 7347 DWConvMicrokernelTester() 7348 .cr(16) 7349 .kr(25) 7350 .channels(channels) 7351 .qmin(128) 7352 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7353 } 7354 } 7355 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,c_gt_16_with_qmax)7356 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, c_gt_16_with_qmax) { 7357 TEST_REQUIRES_X86_FMA3; 7358 for (uint32_t channels = 17; channels < 32; channels++) { 7359 DWConvMicrokernelTester() 7360 .cr(16) 7361 .kr(25) 7362 .channels(channels) 7363 .qmax(128) 7364 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7365 } 7366 } 7367 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel)7368 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel) { 7369 TEST_REQUIRES_X86_FMA3; 7370 for (size_t channels = 1; channels <= 80; channels += 15) { 7371 DWConvMicrokernelTester() 7372 .cr(16) 7373 .kr(25) 7374 .channels(channels) 7375 .width(3) 7376 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7377 } 7378 } 7379 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_step)7380 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_step) { 7381 TEST_REQUIRES_X86_FMA3; 7382 for (size_t channels = 1; channels <= 80; channels += 15) { 7383 for (size_t step = 2; step <= 25; step++) { 7384 DWConvMicrokernelTester() 7385 .cr(16) 7386 .kr(25) 7387 .channels(channels) 7388 .width(3) 7389 .step(step) 7390 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7391 } 7392 } 7393 } 7394 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_output_stride)7395 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_output_stride) { 7396 TEST_REQUIRES_X86_FMA3; 7397 for (size_t channels = 1; channels <= 80; channels += 15) { 7398 DWConvMicrokernelTester() 7399 .cr(16) 7400 .kr(25) 7401 .channels(16) 7402 .width(5) 7403 .output_stride(83) 7404 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7405 } 7406 } 7407 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_qmin)7408 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_qmin) { 7409 TEST_REQUIRES_X86_FMA3; 7410 for (size_t channels = 1; channels <= 80; channels += 15) { 7411 DWConvMicrokernelTester() 7412 .cr(16) 7413 .kr(25) 7414 .channels(channels) 7415 .width(3) 7416 .qmin(128) 7417 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7418 } 7419 } 7420 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,multipixel_with_qmax)7421 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, multipixel_with_qmax) { 7422 TEST_REQUIRES_X86_FMA3; 7423 for (size_t channels = 1; channels <= 80; channels += 15) { 7424 DWConvMicrokernelTester() 7425 .cr(16) 7426 .kr(25) 7427 .channels(channels) 7428 .width(3) 7429 .qmax(128) 7430 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7431 } 7432 } 7433 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,input_offset)7434 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, input_offset) { 7435 TEST_REQUIRES_X86_FMA3; 7436 for (uint32_t channels = 32; channels < 256; channels += 48) { 7437 DWConvMicrokernelTester() 7438 .cr(16) 7439 .kr(25) 7440 .channels(channels) 7441 .input_offset(304) 7442 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7443 } 7444 } 7445 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2,zero)7446 TEST(F16_DWCONV_MINMAX_UP16X25__FMA3_ACC2, zero) { 7447 TEST_REQUIRES_X86_FMA3; 7448 for (uint32_t mz = 0; mz < 25; mz++) { 7449 for (uint32_t channels = 32; channels < 256; channels += 48) { 7450 DWConvMicrokernelTester() 7451 .cr(16) 7452 .kr(25) 7453 .channels(channels) 7454 .input_offset(304) 7455 .zero_index(mz) 7456 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 7457 } 7458 } 7459 } 7460 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 7461 7462 7463 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_eq_32)7464 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_eq_32) { 7465 TEST_REQUIRES_X86_FMA3; 7466 DWConvMicrokernelTester() 7467 .cr(32) 7468 .kr(3) 7469 .channels(32) 7470 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7471 } 7472 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_div_32)7473 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_div_32) { 7474 TEST_REQUIRES_X86_FMA3; 7475 for (uint32_t channels = 64; channels < 512; channels += 96) { 7476 DWConvMicrokernelTester() 7477 .cr(32) 7478 .kr(3) 7479 .channels(channels) 7480 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7481 } 7482 } 7483 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_div_32_with_qmin)7484 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_div_32_with_qmin) { 7485 TEST_REQUIRES_X86_FMA3; 7486 for (uint32_t channels = 64; channels < 512; channels += 96) { 7487 DWConvMicrokernelTester() 7488 .cr(32) 7489 .kr(3) 7490 .channels(channels) 7491 .qmin(128) 7492 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7493 } 7494 } 7495 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_div_32_with_qmax)7496 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_div_32_with_qmax) { 7497 TEST_REQUIRES_X86_FMA3; 7498 for (uint32_t channels = 64; channels < 512; channels += 96) { 7499 DWConvMicrokernelTester() 7500 .cr(32) 7501 .kr(3) 7502 .channels(channels) 7503 .qmax(128) 7504 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7505 } 7506 } 7507 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_lt_32)7508 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_lt_32) { 7509 TEST_REQUIRES_X86_FMA3; 7510 for (uint32_t channels = 1; channels < 32; channels++) { 7511 DWConvMicrokernelTester() 7512 .cr(32) 7513 .kr(3) 7514 .channels(channels) 7515 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7516 } 7517 } 7518 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_gt_32)7519 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_gt_32) { 7520 TEST_REQUIRES_X86_FMA3; 7521 for (uint32_t channels = 33; channels < 64; channels++) { 7522 DWConvMicrokernelTester() 7523 .cr(32) 7524 .kr(3) 7525 .channels(channels) 7526 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7527 } 7528 } 7529 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_gt_32_with_qmin)7530 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_gt_32_with_qmin) { 7531 TEST_REQUIRES_X86_FMA3; 7532 for (uint32_t channels = 33; channels < 64; channels++) { 7533 DWConvMicrokernelTester() 7534 .cr(32) 7535 .kr(3) 7536 .channels(channels) 7537 .qmin(128) 7538 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7539 } 7540 } 7541 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,c_gt_32_with_qmax)7542 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, c_gt_32_with_qmax) { 7543 TEST_REQUIRES_X86_FMA3; 7544 for (uint32_t channels = 33; channels < 64; channels++) { 7545 DWConvMicrokernelTester() 7546 .cr(32) 7547 .kr(3) 7548 .channels(channels) 7549 .qmax(128) 7550 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7551 } 7552 } 7553 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel)7554 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel) { 7555 TEST_REQUIRES_X86_FMA3; 7556 for (size_t channels = 1; channels <= 160; channels += 31) { 7557 DWConvMicrokernelTester() 7558 .cr(32) 7559 .kr(3) 7560 .channels(channels) 7561 .width(3) 7562 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7563 } 7564 } 7565 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_step)7566 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_step) { 7567 TEST_REQUIRES_X86_FMA3; 7568 for (size_t channels = 1; channels <= 160; channels += 31) { 7569 for (size_t step = 2; step <= 3; step++) { 7570 DWConvMicrokernelTester() 7571 .cr(32) 7572 .kr(3) 7573 .channels(channels) 7574 .width(3) 7575 .step(step) 7576 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7577 } 7578 } 7579 } 7580 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_output_stride)7581 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_output_stride) { 7582 TEST_REQUIRES_X86_FMA3; 7583 for (size_t channels = 1; channels <= 160; channels += 31) { 7584 DWConvMicrokernelTester() 7585 .cr(32) 7586 .kr(3) 7587 .channels(32) 7588 .width(5) 7589 .output_stride(163) 7590 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7591 } 7592 } 7593 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_qmin)7594 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_qmin) { 7595 TEST_REQUIRES_X86_FMA3; 7596 for (size_t channels = 1; channels <= 160; channels += 31) { 7597 DWConvMicrokernelTester() 7598 .cr(32) 7599 .kr(3) 7600 .channels(channels) 7601 .width(3) 7602 .qmin(128) 7603 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7604 } 7605 } 7606 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,multipixel_with_qmax)7607 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, multipixel_with_qmax) { 7608 TEST_REQUIRES_X86_FMA3; 7609 for (size_t channels = 1; channels <= 160; channels += 31) { 7610 DWConvMicrokernelTester() 7611 .cr(32) 7612 .kr(3) 7613 .channels(channels) 7614 .width(3) 7615 .qmax(128) 7616 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7617 } 7618 } 7619 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,input_offset)7620 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, input_offset) { 7621 TEST_REQUIRES_X86_FMA3; 7622 for (uint32_t channels = 64; channels < 512; channels += 96) { 7623 DWConvMicrokernelTester() 7624 .cr(32) 7625 .kr(3) 7626 .channels(channels) 7627 .input_offset(592) 7628 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7629 } 7630 } 7631 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3,zero)7632 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3, zero) { 7633 TEST_REQUIRES_X86_FMA3; 7634 for (uint32_t mz = 0; mz < 3; mz++) { 7635 for (uint32_t channels = 64; channels < 512; channels += 96) { 7636 DWConvMicrokernelTester() 7637 .cr(32) 7638 .kr(3) 7639 .channels(channels) 7640 .input_offset(592) 7641 .zero_index(mz) 7642 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3, xnn_init_f16_minmax_avx_params); 7643 } 7644 } 7645 } 7646 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 7647 7648 7649 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_eq_32)7650 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_eq_32) { 7651 TEST_REQUIRES_X86_FMA3; 7652 DWConvMicrokernelTester() 7653 .cr(32) 7654 .kr(3) 7655 .channels(32) 7656 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7657 } 7658 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_div_32)7659 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_div_32) { 7660 TEST_REQUIRES_X86_FMA3; 7661 for (uint32_t channels = 64; channels < 512; channels += 96) { 7662 DWConvMicrokernelTester() 7663 .cr(32) 7664 .kr(3) 7665 .channels(channels) 7666 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7667 } 7668 } 7669 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_div_32_with_qmin)7670 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_div_32_with_qmin) { 7671 TEST_REQUIRES_X86_FMA3; 7672 for (uint32_t channels = 64; channels < 512; channels += 96) { 7673 DWConvMicrokernelTester() 7674 .cr(32) 7675 .kr(3) 7676 .channels(channels) 7677 .qmin(128) 7678 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7679 } 7680 } 7681 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_div_32_with_qmax)7682 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_div_32_with_qmax) { 7683 TEST_REQUIRES_X86_FMA3; 7684 for (uint32_t channels = 64; channels < 512; channels += 96) { 7685 DWConvMicrokernelTester() 7686 .cr(32) 7687 .kr(3) 7688 .channels(channels) 7689 .qmax(128) 7690 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7691 } 7692 } 7693 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_lt_32)7694 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_lt_32) { 7695 TEST_REQUIRES_X86_FMA3; 7696 for (uint32_t channels = 1; channels < 32; channels++) { 7697 DWConvMicrokernelTester() 7698 .cr(32) 7699 .kr(3) 7700 .channels(channels) 7701 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7702 } 7703 } 7704 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_gt_32)7705 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_gt_32) { 7706 TEST_REQUIRES_X86_FMA3; 7707 for (uint32_t channels = 33; channels < 64; channels++) { 7708 DWConvMicrokernelTester() 7709 .cr(32) 7710 .kr(3) 7711 .channels(channels) 7712 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7713 } 7714 } 7715 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_gt_32_with_qmin)7716 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_gt_32_with_qmin) { 7717 TEST_REQUIRES_X86_FMA3; 7718 for (uint32_t channels = 33; channels < 64; channels++) { 7719 DWConvMicrokernelTester() 7720 .cr(32) 7721 .kr(3) 7722 .channels(channels) 7723 .qmin(128) 7724 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7725 } 7726 } 7727 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,c_gt_32_with_qmax)7728 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, c_gt_32_with_qmax) { 7729 TEST_REQUIRES_X86_FMA3; 7730 for (uint32_t channels = 33; channels < 64; channels++) { 7731 DWConvMicrokernelTester() 7732 .cr(32) 7733 .kr(3) 7734 .channels(channels) 7735 .qmax(128) 7736 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7737 } 7738 } 7739 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel)7740 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel) { 7741 TEST_REQUIRES_X86_FMA3; 7742 for (size_t channels = 1; channels <= 160; channels += 31) { 7743 DWConvMicrokernelTester() 7744 .cr(32) 7745 .kr(3) 7746 .channels(channels) 7747 .width(3) 7748 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7749 } 7750 } 7751 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_step)7752 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_step) { 7753 TEST_REQUIRES_X86_FMA3; 7754 for (size_t channels = 1; channels <= 160; channels += 31) { 7755 for (size_t step = 2; step <= 3; step++) { 7756 DWConvMicrokernelTester() 7757 .cr(32) 7758 .kr(3) 7759 .channels(channels) 7760 .width(3) 7761 .step(step) 7762 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7763 } 7764 } 7765 } 7766 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_output_stride)7767 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_output_stride) { 7768 TEST_REQUIRES_X86_FMA3; 7769 for (size_t channels = 1; channels <= 160; channels += 31) { 7770 DWConvMicrokernelTester() 7771 .cr(32) 7772 .kr(3) 7773 .channels(32) 7774 .width(5) 7775 .output_stride(163) 7776 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7777 } 7778 } 7779 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_qmin)7780 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_qmin) { 7781 TEST_REQUIRES_X86_FMA3; 7782 for (size_t channels = 1; channels <= 160; channels += 31) { 7783 DWConvMicrokernelTester() 7784 .cr(32) 7785 .kr(3) 7786 .channels(channels) 7787 .width(3) 7788 .qmin(128) 7789 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7790 } 7791 } 7792 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,multipixel_with_qmax)7793 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, multipixel_with_qmax) { 7794 TEST_REQUIRES_X86_FMA3; 7795 for (size_t channels = 1; channels <= 160; channels += 31) { 7796 DWConvMicrokernelTester() 7797 .cr(32) 7798 .kr(3) 7799 .channels(channels) 7800 .width(3) 7801 .qmax(128) 7802 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7803 } 7804 } 7805 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,input_offset)7806 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, input_offset) { 7807 TEST_REQUIRES_X86_FMA3; 7808 for (uint32_t channels = 64; channels < 512; channels += 96) { 7809 DWConvMicrokernelTester() 7810 .cr(32) 7811 .kr(3) 7812 .channels(channels) 7813 .input_offset(592) 7814 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7815 } 7816 } 7817 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2,zero)7818 TEST(F16_DWCONV_MINMAX_UP32X3__FMA3_ACC2, zero) { 7819 TEST_REQUIRES_X86_FMA3; 7820 for (uint32_t mz = 0; mz < 3; mz++) { 7821 for (uint32_t channels = 64; channels < 512; channels += 96) { 7822 DWConvMicrokernelTester() 7823 .cr(32) 7824 .kr(3) 7825 .channels(channels) 7826 .input_offset(592) 7827 .zero_index(mz) 7828 .Test(xnn_f16_dwconv_minmax_ukernel_up32x3__fma3_acc2, xnn_init_f16_minmax_avx_params); 7829 } 7830 } 7831 } 7832 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 7833 7834 7835 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_eq_32)7836 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_eq_32) { 7837 TEST_REQUIRES_X86_FMA3; 7838 DWConvMicrokernelTester() 7839 .cr(32) 7840 .kr(4) 7841 .channels(32) 7842 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7843 } 7844 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_div_32)7845 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32) { 7846 TEST_REQUIRES_X86_FMA3; 7847 for (uint32_t channels = 64; channels < 512; channels += 96) { 7848 DWConvMicrokernelTester() 7849 .cr(32) 7850 .kr(4) 7851 .channels(channels) 7852 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7853 } 7854 } 7855 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_div_32_with_qmin)7856 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32_with_qmin) { 7857 TEST_REQUIRES_X86_FMA3; 7858 for (uint32_t channels = 64; channels < 512; channels += 96) { 7859 DWConvMicrokernelTester() 7860 .cr(32) 7861 .kr(4) 7862 .channels(channels) 7863 .qmin(128) 7864 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7865 } 7866 } 7867 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_div_32_with_qmax)7868 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_div_32_with_qmax) { 7869 TEST_REQUIRES_X86_FMA3; 7870 for (uint32_t channels = 64; channels < 512; channels += 96) { 7871 DWConvMicrokernelTester() 7872 .cr(32) 7873 .kr(4) 7874 .channels(channels) 7875 .qmax(128) 7876 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7877 } 7878 } 7879 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_lt_32)7880 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_lt_32) { 7881 TEST_REQUIRES_X86_FMA3; 7882 for (uint32_t channels = 1; channels < 32; channels++) { 7883 DWConvMicrokernelTester() 7884 .cr(32) 7885 .kr(4) 7886 .channels(channels) 7887 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7888 } 7889 } 7890 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_gt_32)7891 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32) { 7892 TEST_REQUIRES_X86_FMA3; 7893 for (uint32_t channels = 33; channels < 64; channels++) { 7894 DWConvMicrokernelTester() 7895 .cr(32) 7896 .kr(4) 7897 .channels(channels) 7898 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7899 } 7900 } 7901 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_gt_32_with_qmin)7902 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32_with_qmin) { 7903 TEST_REQUIRES_X86_FMA3; 7904 for (uint32_t channels = 33; channels < 64; channels++) { 7905 DWConvMicrokernelTester() 7906 .cr(32) 7907 .kr(4) 7908 .channels(channels) 7909 .qmin(128) 7910 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7911 } 7912 } 7913 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,c_gt_32_with_qmax)7914 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, c_gt_32_with_qmax) { 7915 TEST_REQUIRES_X86_FMA3; 7916 for (uint32_t channels = 33; channels < 64; channels++) { 7917 DWConvMicrokernelTester() 7918 .cr(32) 7919 .kr(4) 7920 .channels(channels) 7921 .qmax(128) 7922 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7923 } 7924 } 7925 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel)7926 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel) { 7927 TEST_REQUIRES_X86_FMA3; 7928 for (size_t channels = 1; channels <= 160; channels += 31) { 7929 DWConvMicrokernelTester() 7930 .cr(32) 7931 .kr(4) 7932 .channels(channels) 7933 .width(3) 7934 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7935 } 7936 } 7937 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_step)7938 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_step) { 7939 TEST_REQUIRES_X86_FMA3; 7940 for (size_t channels = 1; channels <= 160; channels += 31) { 7941 for (size_t step = 2; step <= 4; step++) { 7942 DWConvMicrokernelTester() 7943 .cr(32) 7944 .kr(4) 7945 .channels(channels) 7946 .width(3) 7947 .step(step) 7948 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7949 } 7950 } 7951 } 7952 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_output_stride)7953 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_output_stride) { 7954 TEST_REQUIRES_X86_FMA3; 7955 for (size_t channels = 1; channels <= 160; channels += 31) { 7956 DWConvMicrokernelTester() 7957 .cr(32) 7958 .kr(4) 7959 .channels(32) 7960 .width(5) 7961 .output_stride(163) 7962 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7963 } 7964 } 7965 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_qmin)7966 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_qmin) { 7967 TEST_REQUIRES_X86_FMA3; 7968 for (size_t channels = 1; channels <= 160; channels += 31) { 7969 DWConvMicrokernelTester() 7970 .cr(32) 7971 .kr(4) 7972 .channels(channels) 7973 .width(3) 7974 .qmin(128) 7975 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7976 } 7977 } 7978 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,multipixel_with_qmax)7979 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, multipixel_with_qmax) { 7980 TEST_REQUIRES_X86_FMA3; 7981 for (size_t channels = 1; channels <= 160; channels += 31) { 7982 DWConvMicrokernelTester() 7983 .cr(32) 7984 .kr(4) 7985 .channels(channels) 7986 .width(3) 7987 .qmax(128) 7988 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 7989 } 7990 } 7991 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,input_offset)7992 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, input_offset) { 7993 TEST_REQUIRES_X86_FMA3; 7994 for (uint32_t channels = 64; channels < 512; channels += 96) { 7995 DWConvMicrokernelTester() 7996 .cr(32) 7997 .kr(4) 7998 .channels(channels) 7999 .input_offset(592) 8000 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 8001 } 8002 } 8003 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3,zero)8004 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3, zero) { 8005 TEST_REQUIRES_X86_FMA3; 8006 for (uint32_t mz = 0; mz < 4; mz++) { 8007 for (uint32_t channels = 64; channels < 512; channels += 96) { 8008 DWConvMicrokernelTester() 8009 .cr(32) 8010 .kr(4) 8011 .channels(channels) 8012 .input_offset(592) 8013 .zero_index(mz) 8014 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3, xnn_init_f16_minmax_avx_params); 8015 } 8016 } 8017 } 8018 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 8019 8020 8021 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_eq_32)8022 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_eq_32) { 8023 TEST_REQUIRES_X86_FMA3; 8024 DWConvMicrokernelTester() 8025 .cr(32) 8026 .kr(4) 8027 .channels(32) 8028 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8029 } 8030 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_div_32)8031 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32) { 8032 TEST_REQUIRES_X86_FMA3; 8033 for (uint32_t channels = 64; channels < 512; channels += 96) { 8034 DWConvMicrokernelTester() 8035 .cr(32) 8036 .kr(4) 8037 .channels(channels) 8038 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8039 } 8040 } 8041 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_div_32_with_qmin)8042 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32_with_qmin) { 8043 TEST_REQUIRES_X86_FMA3; 8044 for (uint32_t channels = 64; channels < 512; channels += 96) { 8045 DWConvMicrokernelTester() 8046 .cr(32) 8047 .kr(4) 8048 .channels(channels) 8049 .qmin(128) 8050 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8051 } 8052 } 8053 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_div_32_with_qmax)8054 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_div_32_with_qmax) { 8055 TEST_REQUIRES_X86_FMA3; 8056 for (uint32_t channels = 64; channels < 512; channels += 96) { 8057 DWConvMicrokernelTester() 8058 .cr(32) 8059 .kr(4) 8060 .channels(channels) 8061 .qmax(128) 8062 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8063 } 8064 } 8065 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_lt_32)8066 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_lt_32) { 8067 TEST_REQUIRES_X86_FMA3; 8068 for (uint32_t channels = 1; channels < 32; channels++) { 8069 DWConvMicrokernelTester() 8070 .cr(32) 8071 .kr(4) 8072 .channels(channels) 8073 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8074 } 8075 } 8076 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_gt_32)8077 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32) { 8078 TEST_REQUIRES_X86_FMA3; 8079 for (uint32_t channels = 33; channels < 64; channels++) { 8080 DWConvMicrokernelTester() 8081 .cr(32) 8082 .kr(4) 8083 .channels(channels) 8084 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8085 } 8086 } 8087 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_gt_32_with_qmin)8088 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32_with_qmin) { 8089 TEST_REQUIRES_X86_FMA3; 8090 for (uint32_t channels = 33; channels < 64; channels++) { 8091 DWConvMicrokernelTester() 8092 .cr(32) 8093 .kr(4) 8094 .channels(channels) 8095 .qmin(128) 8096 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8097 } 8098 } 8099 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,c_gt_32_with_qmax)8100 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, c_gt_32_with_qmax) { 8101 TEST_REQUIRES_X86_FMA3; 8102 for (uint32_t channels = 33; channels < 64; channels++) { 8103 DWConvMicrokernelTester() 8104 .cr(32) 8105 .kr(4) 8106 .channels(channels) 8107 .qmax(128) 8108 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8109 } 8110 } 8111 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel)8112 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel) { 8113 TEST_REQUIRES_X86_FMA3; 8114 for (size_t channels = 1; channels <= 160; channels += 31) { 8115 DWConvMicrokernelTester() 8116 .cr(32) 8117 .kr(4) 8118 .channels(channels) 8119 .width(3) 8120 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8121 } 8122 } 8123 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_step)8124 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_step) { 8125 TEST_REQUIRES_X86_FMA3; 8126 for (size_t channels = 1; channels <= 160; channels += 31) { 8127 for (size_t step = 2; step <= 4; step++) { 8128 DWConvMicrokernelTester() 8129 .cr(32) 8130 .kr(4) 8131 .channels(channels) 8132 .width(3) 8133 .step(step) 8134 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8135 } 8136 } 8137 } 8138 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_output_stride)8139 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_output_stride) { 8140 TEST_REQUIRES_X86_FMA3; 8141 for (size_t channels = 1; channels <= 160; channels += 31) { 8142 DWConvMicrokernelTester() 8143 .cr(32) 8144 .kr(4) 8145 .channels(32) 8146 .width(5) 8147 .output_stride(163) 8148 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8149 } 8150 } 8151 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_qmin)8152 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_qmin) { 8153 TEST_REQUIRES_X86_FMA3; 8154 for (size_t channels = 1; channels <= 160; channels += 31) { 8155 DWConvMicrokernelTester() 8156 .cr(32) 8157 .kr(4) 8158 .channels(channels) 8159 .width(3) 8160 .qmin(128) 8161 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8162 } 8163 } 8164 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,multipixel_with_qmax)8165 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, multipixel_with_qmax) { 8166 TEST_REQUIRES_X86_FMA3; 8167 for (size_t channels = 1; channels <= 160; channels += 31) { 8168 DWConvMicrokernelTester() 8169 .cr(32) 8170 .kr(4) 8171 .channels(channels) 8172 .width(3) 8173 .qmax(128) 8174 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8175 } 8176 } 8177 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,input_offset)8178 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, input_offset) { 8179 TEST_REQUIRES_X86_FMA3; 8180 for (uint32_t channels = 64; channels < 512; channels += 96) { 8181 DWConvMicrokernelTester() 8182 .cr(32) 8183 .kr(4) 8184 .channels(channels) 8185 .input_offset(592) 8186 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8187 } 8188 } 8189 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2,zero)8190 TEST(F16_DWCONV_MINMAX_UP32X4__FMA3_ACC2, zero) { 8191 TEST_REQUIRES_X86_FMA3; 8192 for (uint32_t mz = 0; mz < 4; mz++) { 8193 for (uint32_t channels = 64; channels < 512; channels += 96) { 8194 DWConvMicrokernelTester() 8195 .cr(32) 8196 .kr(4) 8197 .channels(channels) 8198 .input_offset(592) 8199 .zero_index(mz) 8200 .Test(xnn_f16_dwconv_minmax_ukernel_up32x4__fma3_acc2, xnn_init_f16_minmax_avx_params); 8201 } 8202 } 8203 } 8204 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 8205 8206 8207 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_eq_32)8208 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_eq_32) { 8209 TEST_REQUIRES_X86_FMA3; 8210 DWConvMicrokernelTester() 8211 .cr(32) 8212 .kr(9) 8213 .channels(32) 8214 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8215 } 8216 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_div_32)8217 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32) { 8218 TEST_REQUIRES_X86_FMA3; 8219 for (uint32_t channels = 64; channels < 512; channels += 96) { 8220 DWConvMicrokernelTester() 8221 .cr(32) 8222 .kr(9) 8223 .channels(channels) 8224 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8225 } 8226 } 8227 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_div_32_with_qmin)8228 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32_with_qmin) { 8229 TEST_REQUIRES_X86_FMA3; 8230 for (uint32_t channels = 64; channels < 512; channels += 96) { 8231 DWConvMicrokernelTester() 8232 .cr(32) 8233 .kr(9) 8234 .channels(channels) 8235 .qmin(128) 8236 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8237 } 8238 } 8239 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_div_32_with_qmax)8240 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_div_32_with_qmax) { 8241 TEST_REQUIRES_X86_FMA3; 8242 for (uint32_t channels = 64; channels < 512; channels += 96) { 8243 DWConvMicrokernelTester() 8244 .cr(32) 8245 .kr(9) 8246 .channels(channels) 8247 .qmax(128) 8248 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8249 } 8250 } 8251 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_lt_32)8252 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_lt_32) { 8253 TEST_REQUIRES_X86_FMA3; 8254 for (uint32_t channels = 1; channels < 32; channels++) { 8255 DWConvMicrokernelTester() 8256 .cr(32) 8257 .kr(9) 8258 .channels(channels) 8259 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8260 } 8261 } 8262 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_gt_32)8263 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32) { 8264 TEST_REQUIRES_X86_FMA3; 8265 for (uint32_t channels = 33; channels < 64; channels++) { 8266 DWConvMicrokernelTester() 8267 .cr(32) 8268 .kr(9) 8269 .channels(channels) 8270 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8271 } 8272 } 8273 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_gt_32_with_qmin)8274 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32_with_qmin) { 8275 TEST_REQUIRES_X86_FMA3; 8276 for (uint32_t channels = 33; channels < 64; channels++) { 8277 DWConvMicrokernelTester() 8278 .cr(32) 8279 .kr(9) 8280 .channels(channels) 8281 .qmin(128) 8282 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8283 } 8284 } 8285 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,c_gt_32_with_qmax)8286 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, c_gt_32_with_qmax) { 8287 TEST_REQUIRES_X86_FMA3; 8288 for (uint32_t channels = 33; channels < 64; channels++) { 8289 DWConvMicrokernelTester() 8290 .cr(32) 8291 .kr(9) 8292 .channels(channels) 8293 .qmax(128) 8294 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8295 } 8296 } 8297 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel)8298 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel) { 8299 TEST_REQUIRES_X86_FMA3; 8300 for (size_t channels = 1; channels <= 160; channels += 31) { 8301 DWConvMicrokernelTester() 8302 .cr(32) 8303 .kr(9) 8304 .channels(channels) 8305 .width(3) 8306 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8307 } 8308 } 8309 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_step)8310 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_step) { 8311 TEST_REQUIRES_X86_FMA3; 8312 for (size_t channels = 1; channels <= 160; channels += 31) { 8313 for (size_t step = 2; step <= 9; step++) { 8314 DWConvMicrokernelTester() 8315 .cr(32) 8316 .kr(9) 8317 .channels(channels) 8318 .width(3) 8319 .step(step) 8320 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8321 } 8322 } 8323 } 8324 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_output_stride)8325 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_output_stride) { 8326 TEST_REQUIRES_X86_FMA3; 8327 for (size_t channels = 1; channels <= 160; channels += 31) { 8328 DWConvMicrokernelTester() 8329 .cr(32) 8330 .kr(9) 8331 .channels(32) 8332 .width(5) 8333 .output_stride(163) 8334 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8335 } 8336 } 8337 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_qmin)8338 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_qmin) { 8339 TEST_REQUIRES_X86_FMA3; 8340 for (size_t channels = 1; channels <= 160; channels += 31) { 8341 DWConvMicrokernelTester() 8342 .cr(32) 8343 .kr(9) 8344 .channels(channels) 8345 .width(3) 8346 .qmin(128) 8347 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8348 } 8349 } 8350 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,multipixel_with_qmax)8351 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, multipixel_with_qmax) { 8352 TEST_REQUIRES_X86_FMA3; 8353 for (size_t channels = 1; channels <= 160; channels += 31) { 8354 DWConvMicrokernelTester() 8355 .cr(32) 8356 .kr(9) 8357 .channels(channels) 8358 .width(3) 8359 .qmax(128) 8360 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8361 } 8362 } 8363 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,input_offset)8364 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, input_offset) { 8365 TEST_REQUIRES_X86_FMA3; 8366 for (uint32_t channels = 64; channels < 512; channels += 96) { 8367 DWConvMicrokernelTester() 8368 .cr(32) 8369 .kr(9) 8370 .channels(channels) 8371 .input_offset(592) 8372 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8373 } 8374 } 8375 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3,zero)8376 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3, zero) { 8377 TEST_REQUIRES_X86_FMA3; 8378 for (uint32_t mz = 0; mz < 9; mz++) { 8379 for (uint32_t channels = 64; channels < 512; channels += 96) { 8380 DWConvMicrokernelTester() 8381 .cr(32) 8382 .kr(9) 8383 .channels(channels) 8384 .input_offset(592) 8385 .zero_index(mz) 8386 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3, xnn_init_f16_minmax_avx_params); 8387 } 8388 } 8389 } 8390 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 8391 8392 8393 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_eq_32)8394 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_eq_32) { 8395 TEST_REQUIRES_X86_FMA3; 8396 DWConvMicrokernelTester() 8397 .cr(32) 8398 .kr(9) 8399 .channels(32) 8400 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8401 } 8402 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_div_32)8403 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32) { 8404 TEST_REQUIRES_X86_FMA3; 8405 for (uint32_t channels = 64; channels < 512; channels += 96) { 8406 DWConvMicrokernelTester() 8407 .cr(32) 8408 .kr(9) 8409 .channels(channels) 8410 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8411 } 8412 } 8413 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_div_32_with_qmin)8414 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32_with_qmin) { 8415 TEST_REQUIRES_X86_FMA3; 8416 for (uint32_t channels = 64; channels < 512; channels += 96) { 8417 DWConvMicrokernelTester() 8418 .cr(32) 8419 .kr(9) 8420 .channels(channels) 8421 .qmin(128) 8422 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8423 } 8424 } 8425 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_div_32_with_qmax)8426 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_div_32_with_qmax) { 8427 TEST_REQUIRES_X86_FMA3; 8428 for (uint32_t channels = 64; channels < 512; channels += 96) { 8429 DWConvMicrokernelTester() 8430 .cr(32) 8431 .kr(9) 8432 .channels(channels) 8433 .qmax(128) 8434 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8435 } 8436 } 8437 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_lt_32)8438 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_lt_32) { 8439 TEST_REQUIRES_X86_FMA3; 8440 for (uint32_t channels = 1; channels < 32; channels++) { 8441 DWConvMicrokernelTester() 8442 .cr(32) 8443 .kr(9) 8444 .channels(channels) 8445 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8446 } 8447 } 8448 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_gt_32)8449 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32) { 8450 TEST_REQUIRES_X86_FMA3; 8451 for (uint32_t channels = 33; channels < 64; channels++) { 8452 DWConvMicrokernelTester() 8453 .cr(32) 8454 .kr(9) 8455 .channels(channels) 8456 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8457 } 8458 } 8459 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_gt_32_with_qmin)8460 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32_with_qmin) { 8461 TEST_REQUIRES_X86_FMA3; 8462 for (uint32_t channels = 33; channels < 64; channels++) { 8463 DWConvMicrokernelTester() 8464 .cr(32) 8465 .kr(9) 8466 .channels(channels) 8467 .qmin(128) 8468 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8469 } 8470 } 8471 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,c_gt_32_with_qmax)8472 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, c_gt_32_with_qmax) { 8473 TEST_REQUIRES_X86_FMA3; 8474 for (uint32_t channels = 33; channels < 64; channels++) { 8475 DWConvMicrokernelTester() 8476 .cr(32) 8477 .kr(9) 8478 .channels(channels) 8479 .qmax(128) 8480 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8481 } 8482 } 8483 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel)8484 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel) { 8485 TEST_REQUIRES_X86_FMA3; 8486 for (size_t channels = 1; channels <= 160; channels += 31) { 8487 DWConvMicrokernelTester() 8488 .cr(32) 8489 .kr(9) 8490 .channels(channels) 8491 .width(3) 8492 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8493 } 8494 } 8495 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_step)8496 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_step) { 8497 TEST_REQUIRES_X86_FMA3; 8498 for (size_t channels = 1; channels <= 160; channels += 31) { 8499 for (size_t step = 2; step <= 9; step++) { 8500 DWConvMicrokernelTester() 8501 .cr(32) 8502 .kr(9) 8503 .channels(channels) 8504 .width(3) 8505 .step(step) 8506 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8507 } 8508 } 8509 } 8510 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_output_stride)8511 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_output_stride) { 8512 TEST_REQUIRES_X86_FMA3; 8513 for (size_t channels = 1; channels <= 160; channels += 31) { 8514 DWConvMicrokernelTester() 8515 .cr(32) 8516 .kr(9) 8517 .channels(32) 8518 .width(5) 8519 .output_stride(163) 8520 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8521 } 8522 } 8523 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_qmin)8524 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_qmin) { 8525 TEST_REQUIRES_X86_FMA3; 8526 for (size_t channels = 1; channels <= 160; channels += 31) { 8527 DWConvMicrokernelTester() 8528 .cr(32) 8529 .kr(9) 8530 .channels(channels) 8531 .width(3) 8532 .qmin(128) 8533 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8534 } 8535 } 8536 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,multipixel_with_qmax)8537 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, multipixel_with_qmax) { 8538 TEST_REQUIRES_X86_FMA3; 8539 for (size_t channels = 1; channels <= 160; channels += 31) { 8540 DWConvMicrokernelTester() 8541 .cr(32) 8542 .kr(9) 8543 .channels(channels) 8544 .width(3) 8545 .qmax(128) 8546 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8547 } 8548 } 8549 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,input_offset)8550 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, input_offset) { 8551 TEST_REQUIRES_X86_FMA3; 8552 for (uint32_t channels = 64; channels < 512; channels += 96) { 8553 DWConvMicrokernelTester() 8554 .cr(32) 8555 .kr(9) 8556 .channels(channels) 8557 .input_offset(592) 8558 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8559 } 8560 } 8561 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2,zero)8562 TEST(F16_DWCONV_MINMAX_UP32X9__FMA3_ACC2, zero) { 8563 TEST_REQUIRES_X86_FMA3; 8564 for (uint32_t mz = 0; mz < 9; mz++) { 8565 for (uint32_t channels = 64; channels < 512; channels += 96) { 8566 DWConvMicrokernelTester() 8567 .cr(32) 8568 .kr(9) 8569 .channels(channels) 8570 .input_offset(592) 8571 .zero_index(mz) 8572 .Test(xnn_f16_dwconv_minmax_ukernel_up32x9__fma3_acc2, xnn_init_f16_minmax_avx_params); 8573 } 8574 } 8575 } 8576 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 8577 8578 8579 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_eq_32)8580 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_eq_32) { 8581 TEST_REQUIRES_X86_FMA3; 8582 DWConvMicrokernelTester() 8583 .cr(32) 8584 .kr(25) 8585 .channels(32) 8586 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8587 } 8588 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_div_32)8589 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32) { 8590 TEST_REQUIRES_X86_FMA3; 8591 for (uint32_t channels = 64; channels < 512; channels += 96) { 8592 DWConvMicrokernelTester() 8593 .cr(32) 8594 .kr(25) 8595 .channels(channels) 8596 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8597 } 8598 } 8599 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_div_32_with_qmin)8600 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32_with_qmin) { 8601 TEST_REQUIRES_X86_FMA3; 8602 for (uint32_t channels = 64; channels < 512; channels += 96) { 8603 DWConvMicrokernelTester() 8604 .cr(32) 8605 .kr(25) 8606 .channels(channels) 8607 .qmin(128) 8608 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8609 } 8610 } 8611 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_div_32_with_qmax)8612 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_div_32_with_qmax) { 8613 TEST_REQUIRES_X86_FMA3; 8614 for (uint32_t channels = 64; channels < 512; channels += 96) { 8615 DWConvMicrokernelTester() 8616 .cr(32) 8617 .kr(25) 8618 .channels(channels) 8619 .qmax(128) 8620 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8621 } 8622 } 8623 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_lt_32)8624 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_lt_32) { 8625 TEST_REQUIRES_X86_FMA3; 8626 for (uint32_t channels = 1; channels < 32; channels++) { 8627 DWConvMicrokernelTester() 8628 .cr(32) 8629 .kr(25) 8630 .channels(channels) 8631 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8632 } 8633 } 8634 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_gt_32)8635 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32) { 8636 TEST_REQUIRES_X86_FMA3; 8637 for (uint32_t channels = 33; channels < 64; channels++) { 8638 DWConvMicrokernelTester() 8639 .cr(32) 8640 .kr(25) 8641 .channels(channels) 8642 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8643 } 8644 } 8645 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_gt_32_with_qmin)8646 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32_with_qmin) { 8647 TEST_REQUIRES_X86_FMA3; 8648 for (uint32_t channels = 33; channels < 64; channels++) { 8649 DWConvMicrokernelTester() 8650 .cr(32) 8651 .kr(25) 8652 .channels(channels) 8653 .qmin(128) 8654 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8655 } 8656 } 8657 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,c_gt_32_with_qmax)8658 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, c_gt_32_with_qmax) { 8659 TEST_REQUIRES_X86_FMA3; 8660 for (uint32_t channels = 33; channels < 64; channels++) { 8661 DWConvMicrokernelTester() 8662 .cr(32) 8663 .kr(25) 8664 .channels(channels) 8665 .qmax(128) 8666 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8667 } 8668 } 8669 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel)8670 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel) { 8671 TEST_REQUIRES_X86_FMA3; 8672 for (size_t channels = 1; channels <= 160; channels += 31) { 8673 DWConvMicrokernelTester() 8674 .cr(32) 8675 .kr(25) 8676 .channels(channels) 8677 .width(3) 8678 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8679 } 8680 } 8681 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_step)8682 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_step) { 8683 TEST_REQUIRES_X86_FMA3; 8684 for (size_t channels = 1; channels <= 160; channels += 31) { 8685 for (size_t step = 2; step <= 25; step++) { 8686 DWConvMicrokernelTester() 8687 .cr(32) 8688 .kr(25) 8689 .channels(channels) 8690 .width(3) 8691 .step(step) 8692 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8693 } 8694 } 8695 } 8696 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_output_stride)8697 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_output_stride) { 8698 TEST_REQUIRES_X86_FMA3; 8699 for (size_t channels = 1; channels <= 160; channels += 31) { 8700 DWConvMicrokernelTester() 8701 .cr(32) 8702 .kr(25) 8703 .channels(32) 8704 .width(5) 8705 .output_stride(163) 8706 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8707 } 8708 } 8709 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_qmin)8710 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_qmin) { 8711 TEST_REQUIRES_X86_FMA3; 8712 for (size_t channels = 1; channels <= 160; channels += 31) { 8713 DWConvMicrokernelTester() 8714 .cr(32) 8715 .kr(25) 8716 .channels(channels) 8717 .width(3) 8718 .qmin(128) 8719 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8720 } 8721 } 8722 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,multipixel_with_qmax)8723 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, multipixel_with_qmax) { 8724 TEST_REQUIRES_X86_FMA3; 8725 for (size_t channels = 1; channels <= 160; channels += 31) { 8726 DWConvMicrokernelTester() 8727 .cr(32) 8728 .kr(25) 8729 .channels(channels) 8730 .width(3) 8731 .qmax(128) 8732 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8733 } 8734 } 8735 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,input_offset)8736 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, input_offset) { 8737 TEST_REQUIRES_X86_FMA3; 8738 for (uint32_t channels = 64; channels < 512; channels += 96) { 8739 DWConvMicrokernelTester() 8740 .cr(32) 8741 .kr(25) 8742 .channels(channels) 8743 .input_offset(592) 8744 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8745 } 8746 } 8747 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3,zero)8748 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3, zero) { 8749 TEST_REQUIRES_X86_FMA3; 8750 for (uint32_t mz = 0; mz < 25; mz++) { 8751 for (uint32_t channels = 64; channels < 512; channels += 96) { 8752 DWConvMicrokernelTester() 8753 .cr(32) 8754 .kr(25) 8755 .channels(channels) 8756 .input_offset(592) 8757 .zero_index(mz) 8758 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3, xnn_init_f16_minmax_avx_params); 8759 } 8760 } 8761 } 8762 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 8763 8764 8765 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_eq_32)8766 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_eq_32) { 8767 TEST_REQUIRES_X86_FMA3; 8768 DWConvMicrokernelTester() 8769 .cr(32) 8770 .kr(25) 8771 .channels(32) 8772 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8773 } 8774 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_div_32)8775 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32) { 8776 TEST_REQUIRES_X86_FMA3; 8777 for (uint32_t channels = 64; channels < 512; channels += 96) { 8778 DWConvMicrokernelTester() 8779 .cr(32) 8780 .kr(25) 8781 .channels(channels) 8782 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8783 } 8784 } 8785 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_div_32_with_qmin)8786 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32_with_qmin) { 8787 TEST_REQUIRES_X86_FMA3; 8788 for (uint32_t channels = 64; channels < 512; channels += 96) { 8789 DWConvMicrokernelTester() 8790 .cr(32) 8791 .kr(25) 8792 .channels(channels) 8793 .qmin(128) 8794 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8795 } 8796 } 8797 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_div_32_with_qmax)8798 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_div_32_with_qmax) { 8799 TEST_REQUIRES_X86_FMA3; 8800 for (uint32_t channels = 64; channels < 512; channels += 96) { 8801 DWConvMicrokernelTester() 8802 .cr(32) 8803 .kr(25) 8804 .channels(channels) 8805 .qmax(128) 8806 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8807 } 8808 } 8809 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_lt_32)8810 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_lt_32) { 8811 TEST_REQUIRES_X86_FMA3; 8812 for (uint32_t channels = 1; channels < 32; channels++) { 8813 DWConvMicrokernelTester() 8814 .cr(32) 8815 .kr(25) 8816 .channels(channels) 8817 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8818 } 8819 } 8820 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_gt_32)8821 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32) { 8822 TEST_REQUIRES_X86_FMA3; 8823 for (uint32_t channels = 33; channels < 64; channels++) { 8824 DWConvMicrokernelTester() 8825 .cr(32) 8826 .kr(25) 8827 .channels(channels) 8828 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8829 } 8830 } 8831 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_gt_32_with_qmin)8832 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32_with_qmin) { 8833 TEST_REQUIRES_X86_FMA3; 8834 for (uint32_t channels = 33; channels < 64; channels++) { 8835 DWConvMicrokernelTester() 8836 .cr(32) 8837 .kr(25) 8838 .channels(channels) 8839 .qmin(128) 8840 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8841 } 8842 } 8843 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,c_gt_32_with_qmax)8844 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, c_gt_32_with_qmax) { 8845 TEST_REQUIRES_X86_FMA3; 8846 for (uint32_t channels = 33; channels < 64; channels++) { 8847 DWConvMicrokernelTester() 8848 .cr(32) 8849 .kr(25) 8850 .channels(channels) 8851 .qmax(128) 8852 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8853 } 8854 } 8855 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel)8856 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel) { 8857 TEST_REQUIRES_X86_FMA3; 8858 for (size_t channels = 1; channels <= 160; channels += 31) { 8859 DWConvMicrokernelTester() 8860 .cr(32) 8861 .kr(25) 8862 .channels(channels) 8863 .width(3) 8864 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8865 } 8866 } 8867 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_step)8868 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_step) { 8869 TEST_REQUIRES_X86_FMA3; 8870 for (size_t channels = 1; channels <= 160; channels += 31) { 8871 for (size_t step = 2; step <= 25; step++) { 8872 DWConvMicrokernelTester() 8873 .cr(32) 8874 .kr(25) 8875 .channels(channels) 8876 .width(3) 8877 .step(step) 8878 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8879 } 8880 } 8881 } 8882 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_output_stride)8883 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_output_stride) { 8884 TEST_REQUIRES_X86_FMA3; 8885 for (size_t channels = 1; channels <= 160; channels += 31) { 8886 DWConvMicrokernelTester() 8887 .cr(32) 8888 .kr(25) 8889 .channels(32) 8890 .width(5) 8891 .output_stride(163) 8892 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8893 } 8894 } 8895 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_qmin)8896 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_qmin) { 8897 TEST_REQUIRES_X86_FMA3; 8898 for (size_t channels = 1; channels <= 160; channels += 31) { 8899 DWConvMicrokernelTester() 8900 .cr(32) 8901 .kr(25) 8902 .channels(channels) 8903 .width(3) 8904 .qmin(128) 8905 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8906 } 8907 } 8908 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,multipixel_with_qmax)8909 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, multipixel_with_qmax) { 8910 TEST_REQUIRES_X86_FMA3; 8911 for (size_t channels = 1; channels <= 160; channels += 31) { 8912 DWConvMicrokernelTester() 8913 .cr(32) 8914 .kr(25) 8915 .channels(channels) 8916 .width(3) 8917 .qmax(128) 8918 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8919 } 8920 } 8921 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,input_offset)8922 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, input_offset) { 8923 TEST_REQUIRES_X86_FMA3; 8924 for (uint32_t channels = 64; channels < 512; channels += 96) { 8925 DWConvMicrokernelTester() 8926 .cr(32) 8927 .kr(25) 8928 .channels(channels) 8929 .input_offset(592) 8930 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8931 } 8932 } 8933 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2,zero)8934 TEST(F16_DWCONV_MINMAX_UP32X25__FMA3_ACC2, zero) { 8935 TEST_REQUIRES_X86_FMA3; 8936 for (uint32_t mz = 0; mz < 25; mz++) { 8937 for (uint32_t channels = 64; channels < 512; channels += 96) { 8938 DWConvMicrokernelTester() 8939 .cr(32) 8940 .kr(25) 8941 .channels(channels) 8942 .input_offset(592) 8943 .zero_index(mz) 8944 .Test(xnn_f16_dwconv_minmax_ukernel_up32x25__fma3_acc2, xnn_init_f16_minmax_avx_params); 8945 } 8946 } 8947 } 8948 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 8949